mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 18:26:42 +00:00
for-6.3/block-2023-02-16
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmPvfncQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpob2EADXJxcr2jjYHm/7cjKkyuVX8fr80dNdMeuY JFdsjG1k6Uj73BVhQQWYTcs/PsrWBHWRsv6uz4WgOELj55eXmf5Q0kJszyUeJW33 /DjqLvtoppVcYf80xE13wKvCfn73BjwQo6xkGM0qAYn15eaXiD/Ax3xC6eJlsBeK PEw7EJyhacbSxZa/1D2B6+mqII1jUQWProTCc3udZ4JHi3WvdWa3Rda0qCqHl4a1 +K2aP2YTFIRPxBzfMNa/CafWVIFubTdht+4Ds6R60RImzB9e0VUBfcsiUyW5Zg7L Fwv7ptXuWrALwVNdW56Oz1QikBxn2pdRR2HMLwKJW1MD8kP9r8LMm2jV5Rhiwe0B OQsGRYkOzBvw+bxeP5fvk0iPGVMz6ActH4gkraA5QdLqayDaFYOadlhqz0uRo5SH Fb42Vl658K/MHDSIk8U58TNkmrsIJsBGohXI9DOGINPPvv3XOPi4Q1HmXkGRmii0 y+lNU/QEGh7xXXew29SPP76uQpQaYfC7NxXCMw/OpOMwehzjsjshmM2lpxi8zsgt PJUmfHv5qxCplNmTJXmUpmX7sS7550HUdu9FJb13DM+gzKg8bk9jWVuLrzqrVlG5 1hKWEl1+heg1heRfaIuJVLbPI0au6Sb4uqhih/PHyrP9TWIoAruDbDJM65GKTxyE 2uEgcHzHQw== =poRc -----END PGP SIGNATURE----- Merge tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux Pull block updates from Jens Axboe: - NVMe updates via Christoph: - Small improvements to the logging functionality (Amit Engel) - Authentication cleanups (Hannes Reinecke) - Cleanup and optimize the DMA mapping cod in the PCIe driver (Keith Busch) - Work around the command effects for Format NVM (Keith Busch) - Misc cleanups (Keith Busch, Christoph Hellwig) - Fix and cleanup freeing single sgl (Keith Busch) - MD updates via Song: - Fix a rare crash during the takeover process - Don't update recovery_cp when curr_resync is ACTIVE - Free writes_pending in md_stop - Change active_io to percpu - Updates to drbd, inching us closer to unifying the out-of-tree driver with the in-tree one (Andreas, Christoph, Lars, Robert) - BFQ update adding support for multi-actuator drives (Paolo, Federico, Davide) - Make brd compliant with REQ_NOWAIT (me) - Fix for IOPOLL and queue entering, fixing stalled IO waiting on timeouts (me) - Fix for REQ_NOWAIT with multiple bios (me) - Fix memory leak in blktrace cleanup (Greg) - Clean up sbitmap and fix a potential hang (Kemeng) - Clean up some bits in BFQ, and fix a bug in the request injection (Kemeng) - Clean up the request allocation and issue code, and fix some bugs related to that (Kemeng) - ublk updates and fixes: - Add support for unprivileged ublk (Ming) - Improve device deletion handling (Ming) - Misc (Liu, Ziyang) - s390 dasd fixes (Alexander, Qiheng) - Improve utility of request caching and fixes (Anuj, Xiao) - zoned cleanups (Pankaj) - More constification for kobjs (Thomas) - blk-iocost cleanups (Yu) - Remove bio splitting from drivers that don't need it (Christoph) - Switch blk-cgroups to use struct gendisk. Some of this is now incomplete as select late reverts were done. (Christoph) - Add bvec initialization helpers, and convert callers to use that rather than open-coding it (Christoph) - Misc fixes and cleanups (Jinke, Keith, Arnd, Bart, Li, Martin, Matthew, Ulf, Zhong) * tag 'for-6.3/block-2023-02-16' of git://git.kernel.dk/linux: (169 commits) brd: use radix_tree_maybe_preload instead of radix_tree_preload block: use proper return value from bio_failfast() block: bio-integrity: Copy flags when bio_integrity_payload is cloned block: Fix io statistics for cgroup in throttle path brd: mark as nowait compatible brd: check for REQ_NOWAIT and set correct page allocation mask brd: return 0/-error from brd_insert_page() block: sync mixed merged request's failfast with 1st bio's Revert "blk-cgroup: pin the gendisk in struct blkcg_gq" Revert "blk-cgroup: pass a gendisk to blkg_lookup" Revert "blk-cgroup: delay blk-cgroup initialization until add_disk" Revert "blk-cgroup: delay calling blkcg_exit_disk until disk_release" Revert "blk-cgroup: move the cgroup information to struct gendisk" nvme-pci: remove iod use_sgls nvme-pci: fix freeing single sgl block: ublk: check IO buffer based on flag need_get_data s390/dasd: Fix potential memleak in dasd_eckd_init() s390/dasd: sort out physical vs virtual pointers usage block: Remove the ALLOC_CACHE_SLACK constant block: make kobj_type structures constant ...
This commit is contained in:
commit
5b0ed59649
@ -432,7 +432,8 @@ Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RW] This is the maximum number of kilobytes that the block
|
||||
layer will allow for a filesystem request. Must be smaller than
|
||||
or equal to the maximum size allowed by the hardware.
|
||||
or equal to the maximum size allowed by the hardware. Write 0
|
||||
to use default kernel settings.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/max_segment_size
|
||||
|
@ -1,10 +0,0 @@
|
||||
===============================
|
||||
Generic Block Device Capability
|
||||
===============================
|
||||
|
||||
This file documents the sysfs file ``block/<disk>/capability``.
|
||||
|
||||
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
||||
capabilities a specific block device supports:
|
||||
|
||||
.. kernel-doc:: include/linux/blkdev.h
|
@ -10,7 +10,6 @@ Block
|
||||
bfq-iosched
|
||||
biovecs
|
||||
blk-mq
|
||||
capability
|
||||
cmdline-partition
|
||||
data-integrity
|
||||
deadline-iosched
|
||||
|
@ -144,6 +144,43 @@ managing and controlling ublk devices with help of several control commands:
|
||||
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
|
||||
responsibility to save IO target specific info in userspace.
|
||||
|
||||
- ``UBLK_CMD_GET_DEV_INFO2``
|
||||
Same purpose with ``UBLK_CMD_GET_DEV_INFO``, but ublk server has to
|
||||
provide path of the char device of ``/dev/ublkc*`` for kernel to run
|
||||
permission check, and this command is added for supporting unprivileged
|
||||
ublk device, and introduced with ``UBLK_F_UNPRIVILEGED_DEV`` together.
|
||||
Only the user owning the requested device can retrieve the device info.
|
||||
|
||||
How to deal with userspace/kernel compatibility:
|
||||
|
||||
1) if kernel is capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
|
||||
|
||||
If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
ublk server should send ``UBLK_CMD_GET_DEV_INFO2``, given anytime
|
||||
unprivileged application needs to query devices the current user owns,
|
||||
when the application has no idea if ``UBLK_F_UNPRIVILEGED_DEV`` is set
|
||||
given the capability info is stateless, and application should always
|
||||
retrieve it via ``UBLK_CMD_GET_DEV_INFO2``
|
||||
|
||||
If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
|
||||
UBLK_F_UNPRIVILEGED_DEV isn't available for user
|
||||
|
||||
2) if kernel isn't capable of handling ``UBLK_F_UNPRIVILEGED_DEV``
|
||||
|
||||
If ublk server supports ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO2`` is tried first, and will be failed, then
|
||||
``UBLK_CMD_GET_DEV_INFO`` needs to be retried given
|
||||
``UBLK_F_UNPRIVILEGED_DEV`` can't be set
|
||||
|
||||
If ublk server doesn't support ``UBLK_F_UNPRIVILEGED_DEV``:
|
||||
|
||||
``UBLK_CMD_GET_DEV_INFO`` is always sent to kernel, and the feature of
|
||||
``UBLK_F_UNPRIVILEGED_DEV`` isn't available for user
|
||||
|
||||
- ``UBLK_CMD_START_USER_RECOVERY``
|
||||
|
||||
This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
|
||||
@ -180,6 +217,15 @@ managing and controlling ublk devices with help of several control commands:
|
||||
double-write since the driver may issue the same I/O request twice. It
|
||||
might be useful to a read-only FS or a VM backend.
|
||||
|
||||
Unprivileged ublk device is supported by passing ``UBLK_F_UNPRIVILEGED_DEV``.
|
||||
Once the flag is set, all control commands can be sent by unprivileged
|
||||
user. Except for command of ``UBLK_CMD_ADD_DEV``, permission check on
|
||||
the specified char device(``/dev/ublkc*``) is done for all other control
|
||||
commands by ublk driver, for doing that, path of the char device has to
|
||||
be provided in these commands' payload from ublk server. With this way,
|
||||
ublk device becomes container-ware, and device created in one container
|
||||
can be controlled/accessed just inside this container.
|
||||
|
||||
Data plane
|
||||
----------
|
||||
|
||||
@ -254,15 +300,6 @@ with specified IO tag in the command data:
|
||||
Future development
|
||||
==================
|
||||
|
||||
Container-aware ublk deivice
|
||||
----------------------------
|
||||
|
||||
ublk driver doesn't handle any IO logic. Its function is well defined
|
||||
for now and very limited userspace interfaces are needed, which is also
|
||||
well defined too. It is possible to make ublk devices container-aware block
|
||||
devices in future as Stefan Hajnoczi suggested [#stefan]_, by removing
|
||||
ADMIN privilege.
|
||||
|
||||
Zero copy
|
||||
---------
|
||||
|
||||
|
@ -6425,6 +6425,7 @@ T: git git://git.linbit.com/linux-drbd.git
|
||||
T: git git://git.linbit.com/drbd-8.4.git
|
||||
F: Documentation/admin-guide/blockdev/
|
||||
F: drivers/block/drbd/
|
||||
F: include/linux/drbd*
|
||||
F: lib/lru_cache.c
|
||||
|
||||
DRIVER COMPONENT FRAMEWORK
|
||||
|
@ -30,6 +30,7 @@ config IOSCHED_BFQ
|
||||
config BFQ_GROUP_IOSCHED
|
||||
bool "BFQ hierarchical scheduling support"
|
||||
depends on IOSCHED_BFQ && BLK_CGROUP
|
||||
default y
|
||||
select BLK_CGROUP_RWSTAT
|
||||
help
|
||||
|
||||
|
@ -513,12 +513,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd)
|
||||
kfree(cpd_to_bfqgd(cpd));
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
struct bfq_group *bfqg;
|
||||
|
||||
bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
|
||||
bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id);
|
||||
if (!bfqg)
|
||||
return NULL;
|
||||
|
||||
@ -551,7 +551,6 @@ static void bfq_pd_init(struct blkg_policy_data *pd)
|
||||
bfqg->bfqd = bfqd;
|
||||
bfqg->active_entities = 0;
|
||||
bfqg->num_queues_with_pending_reqs = 0;
|
||||
bfqg->online = true;
|
||||
bfqg->rq_pos_tree = RB_ROOT;
|
||||
}
|
||||
|
||||
@ -614,7 +613,7 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio)
|
||||
continue;
|
||||
}
|
||||
bfqg = blkg_to_bfqg(blkg);
|
||||
if (bfqg->online) {
|
||||
if (bfqg->pd.online) {
|
||||
bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
|
||||
return bfqg;
|
||||
}
|
||||
@ -706,12 +705,52 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bfq_activate_bfqq(bfqd, bfqq);
|
||||
}
|
||||
|
||||
if (!bfqd->in_service_queue && !bfqd->rq_in_driver)
|
||||
if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver)
|
||||
bfq_schedule_dispatch(bfqd);
|
||||
/* release extra ref taken above, bfqq may happen to be freed now */
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
static void bfq_sync_bfqq_move(struct bfq_data *bfqd,
|
||||
struct bfq_queue *sync_bfqq,
|
||||
struct bfq_io_cq *bic,
|
||||
struct bfq_group *bfqg,
|
||||
unsigned int act_idx)
|
||||
{
|
||||
struct bfq_queue *bfqq;
|
||||
|
||||
if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
|
||||
/* We are the only user of this bfqq, just move it */
|
||||
if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The queue was merged to a different queue. Check
|
||||
* that the merge chain still belongs to the same
|
||||
* cgroup.
|
||||
*/
|
||||
for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
|
||||
if (bfqq->entity.sched_data != &bfqg->sched_data)
|
||||
break;
|
||||
if (bfqq) {
|
||||
/*
|
||||
* Some queue changed cgroup so the merge is not valid
|
||||
* anymore. We cannot easily just cancel the merge (by
|
||||
* clearing new_bfqq) as there may be other processes
|
||||
* using this queue and holding refs to all queues
|
||||
* below sync_bfqq->new_bfqq. Similarly if the merge
|
||||
* already happened, we need to detach from bfqq now
|
||||
* so that we cannot merge bio to a request from the
|
||||
* old cgroup.
|
||||
*/
|
||||
bfq_put_cooperator(sync_bfqq);
|
||||
bic_set_bfqq(bic, NULL, true, act_idx);
|
||||
bfq_release_process_ref(bfqd, sync_bfqq);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* __bfq_bic_change_cgroup - move @bic to @bfqg.
|
||||
* @bfqd: the queue descriptor.
|
||||
@ -726,53 +765,20 @@ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd,
|
||||
struct bfq_io_cq *bic,
|
||||
struct bfq_group *bfqg)
|
||||
{
|
||||
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false);
|
||||
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true);
|
||||
struct bfq_entity *entity;
|
||||
unsigned int act_idx;
|
||||
|
||||
if (async_bfqq) {
|
||||
entity = &async_bfqq->entity;
|
||||
for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) {
|
||||
struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx);
|
||||
struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx);
|
||||
|
||||
if (entity->sched_data != &bfqg->sched_data) {
|
||||
bic_set_bfqq(bic, NULL, false);
|
||||
if (async_bfqq &&
|
||||
async_bfqq->entity.sched_data != &bfqg->sched_data) {
|
||||
bic_set_bfqq(bic, NULL, false, act_idx);
|
||||
bfq_release_process_ref(bfqd, async_bfqq);
|
||||
}
|
||||
}
|
||||
|
||||
if (sync_bfqq) {
|
||||
if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
|
||||
/* We are the only user of this bfqq, just move it */
|
||||
if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
|
||||
bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
|
||||
} else {
|
||||
struct bfq_queue *bfqq;
|
||||
|
||||
/*
|
||||
* The queue was merged to a different queue. Check
|
||||
* that the merge chain still belongs to the same
|
||||
* cgroup.
|
||||
*/
|
||||
for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
|
||||
if (bfqq->entity.sched_data !=
|
||||
&bfqg->sched_data)
|
||||
break;
|
||||
if (bfqq) {
|
||||
/*
|
||||
* Some queue changed cgroup so the merge is
|
||||
* not valid anymore. We cannot easily just
|
||||
* cancel the merge (by clearing new_bfqq) as
|
||||
* there may be other processes using this
|
||||
* queue and holding refs to all queues below
|
||||
* sync_bfqq->new_bfqq. Similarly if the merge
|
||||
* already happened, we need to detach from
|
||||
* bfqq now so that we cannot merge bio to a
|
||||
* request from the old cgroup.
|
||||
*/
|
||||
bfq_put_cooperator(sync_bfqq);
|
||||
bic_set_bfqq(bic, NULL, true);
|
||||
bfq_release_process_ref(bfqd, sync_bfqq);
|
||||
}
|
||||
}
|
||||
if (sync_bfqq)
|
||||
bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx);
|
||||
}
|
||||
}
|
||||
|
||||
@ -978,7 +984,6 @@ static void bfq_pd_offline(struct blkg_policy_data *pd)
|
||||
|
||||
put_async_queues:
|
||||
bfq_put_async_queues(bfqd, bfqg);
|
||||
bfqg->online = false;
|
||||
|
||||
spin_unlock_irqrestore(&bfqd->lock, flags);
|
||||
/*
|
||||
@ -1284,7 +1289,7 @@ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = blkcg_activate_policy(bfqd->queue, &blkcg_policy_bfq);
|
||||
ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq);
|
||||
if (ret)
|
||||
return NULL;
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,14 @@
|
||||
*/
|
||||
#define BFQ_SOFTRT_WEIGHT_FACTOR 100
|
||||
|
||||
/*
|
||||
* Maximum number of actuators supported. This constant is used simply
|
||||
* to define the size of the static array that will contain
|
||||
* per-actuator data. The current value is hopefully a good upper
|
||||
* bound to the possible number of actuators of any actual drive.
|
||||
*/
|
||||
#define BFQ_MAX_ACTUATORS 8
|
||||
|
||||
struct bfq_entity;
|
||||
|
||||
/**
|
||||
@ -227,12 +235,14 @@ struct bfq_ttime {
|
||||
* struct bfq_queue - leaf schedulable entity.
|
||||
*
|
||||
* A bfq_queue is a leaf request queue; it can be associated with an
|
||||
* io_context or more, if it is async or shared between cooperating
|
||||
* processes. @cgroup holds a reference to the cgroup, to be sure that it
|
||||
* does not disappear while a bfqq still references it (mostly to avoid
|
||||
* races between request issuing and task migration followed by cgroup
|
||||
* destruction).
|
||||
* All the fields are protected by the queue lock of the containing bfqd.
|
||||
* io_context or more, if it is async or shared between cooperating
|
||||
* processes. Besides, it contains I/O requests for only one actuator
|
||||
* (an io_context is associated with a different bfq_queue for each
|
||||
* actuator it generates I/O for). @cgroup holds a reference to the
|
||||
* cgroup, to be sure that it does not disappear while a bfqq still
|
||||
* references it (mostly to avoid races between request issuing and
|
||||
* task migration followed by cgroup destruction). All the fields are
|
||||
* protected by the queue lock of the containing bfqd.
|
||||
*/
|
||||
struct bfq_queue {
|
||||
/* reference counter */
|
||||
@ -397,24 +407,18 @@ struct bfq_queue {
|
||||
* the woken queues when this queue exits.
|
||||
*/
|
||||
struct hlist_head woken_list;
|
||||
|
||||
/* index of the actuator this queue is associated with */
|
||||
unsigned int actuator_idx;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct bfq_io_cq - per (request_queue, io_context) structure.
|
||||
*/
|
||||
struct bfq_io_cq {
|
||||
/* associated io_cq structure */
|
||||
struct io_cq icq; /* must be the first member */
|
||||
/* array of two process queues, the sync and the async */
|
||||
struct bfq_queue *bfqq[2];
|
||||
/* per (request_queue, blkcg) ioprio */
|
||||
int ioprio;
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
uint64_t blkcg_serial_nr; /* the current blkcg serial */
|
||||
#endif
|
||||
* struct bfq_data - bfqq data unique and persistent for associated bfq_io_cq
|
||||
*/
|
||||
struct bfq_iocq_bfqq_data {
|
||||
/*
|
||||
* Snapshot of the has_short_time flag before merging; taken
|
||||
* to remember its value while the queue is merged, so as to
|
||||
* to remember its values while the queue is merged, so as to
|
||||
* be able to restore it in case of split.
|
||||
*/
|
||||
bool saved_has_short_ttime;
|
||||
@ -428,7 +432,7 @@ struct bfq_io_cq {
|
||||
u64 saved_tot_idle_time;
|
||||
|
||||
/*
|
||||
* Same purpose as the previous fields for the value of the
|
||||
* Same purpose as the previous fields for the values of the
|
||||
* field keeping the queue's belonging to a large burst
|
||||
*/
|
||||
bool saved_in_large_burst;
|
||||
@ -466,6 +470,38 @@ struct bfq_io_cq {
|
||||
struct bfq_queue *stable_merge_bfqq;
|
||||
|
||||
bool stably_merged; /* non splittable if true */
|
||||
};
|
||||
|
||||
/**
|
||||
* struct bfq_io_cq - per (request_queue, io_context) structure.
|
||||
*/
|
||||
struct bfq_io_cq {
|
||||
/* associated io_cq structure */
|
||||
struct io_cq icq; /* must be the first member */
|
||||
/*
|
||||
* Matrix of associated process queues: first row for async
|
||||
* queues, second row sync queues. Each row contains one
|
||||
* column for each actuator. An I/O request generated by the
|
||||
* process is inserted into the queue pointed by bfqq[i][j] if
|
||||
* the request is to be served by the j-th actuator of the
|
||||
* drive, where i==0 or i==1, depending on whether the request
|
||||
* is async or sync. So there is a distinct queue for each
|
||||
* actuator.
|
||||
*/
|
||||
struct bfq_queue *bfqq[2][BFQ_MAX_ACTUATORS];
|
||||
/* per (request_queue, blkcg) ioprio */
|
||||
int ioprio;
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
uint64_t blkcg_serial_nr; /* the current blkcg serial */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Persistent data for associated synchronous process queues
|
||||
* (one queue per actuator, see field bfqq above). In
|
||||
* particular, each of these queues may undergo a merge.
|
||||
*/
|
||||
struct bfq_iocq_bfqq_data bfqq_data[BFQ_MAX_ACTUATORS];
|
||||
|
||||
unsigned int requests; /* Number of requests this process has in flight */
|
||||
};
|
||||
|
||||
@ -554,7 +590,12 @@ struct bfq_data {
|
||||
/* number of queued requests */
|
||||
int queued;
|
||||
/* number of requests dispatched and waiting for completion */
|
||||
int rq_in_driver;
|
||||
int tot_rq_in_driver;
|
||||
/*
|
||||
* number of requests dispatched and waiting for completion
|
||||
* for each actuator
|
||||
*/
|
||||
int rq_in_driver[BFQ_MAX_ACTUATORS];
|
||||
|
||||
/* true if the device is non rotational and performs queueing */
|
||||
bool nonrot_with_queueing;
|
||||
@ -648,8 +689,13 @@ struct bfq_data {
|
||||
/* maximum budget allotted to a bfq_queue before rescheduling */
|
||||
int bfq_max_budget;
|
||||
|
||||
/* list of all the bfq_queues active on the device */
|
||||
struct list_head active_list;
|
||||
/*
|
||||
* List of all the bfq_queues active for a specific actuator
|
||||
* on the device. Keeping active queues separate on a
|
||||
* per-actuator basis helps implementing per-actuator
|
||||
* injection more efficiently.
|
||||
*/
|
||||
struct list_head active_list[BFQ_MAX_ACTUATORS];
|
||||
/* list of all the bfq_queues idle on the device */
|
||||
struct list_head idle_list;
|
||||
|
||||
@ -723,8 +769,6 @@ struct bfq_data {
|
||||
* is multiplied.
|
||||
*/
|
||||
unsigned int bfq_wr_coeff;
|
||||
/* maximum duration of a weight-raising period (jiffies) */
|
||||
unsigned int bfq_wr_max_time;
|
||||
|
||||
/* Maximum weight-raising duration for soft real-time processes */
|
||||
unsigned int bfq_wr_rt_max_time;
|
||||
@ -772,6 +816,42 @@ struct bfq_data {
|
||||
*/
|
||||
unsigned int word_depths[2][2];
|
||||
unsigned int full_depth_shift;
|
||||
|
||||
/*
|
||||
* Number of independent actuators. This is equal to 1 in
|
||||
* case of single-actuator drives.
|
||||
*/
|
||||
unsigned int num_actuators;
|
||||
/*
|
||||
* Disk independent access ranges for each actuator
|
||||
* in this device.
|
||||
*/
|
||||
sector_t sector[BFQ_MAX_ACTUATORS];
|
||||
sector_t nr_sectors[BFQ_MAX_ACTUATORS];
|
||||
struct blk_independent_access_range ia_ranges[BFQ_MAX_ACTUATORS];
|
||||
|
||||
/*
|
||||
* If the number of I/O requests queued in the device for a
|
||||
* given actuator is below next threshold, then the actuator
|
||||
* is deemed as underutilized. If this condition is found to
|
||||
* hold for some actuator upon a dispatch, but (i) the
|
||||
* in-service queue does not contain I/O for that actuator,
|
||||
* while (ii) some other queue does contain I/O for that
|
||||
* actuator, then the head I/O request of the latter queue is
|
||||
* returned (injected), instead of the head request of the
|
||||
* currently in-service queue.
|
||||
*
|
||||
* We set the threshold, empirically, to the minimum possible
|
||||
* value for which an actuator is fully utilized, or close to
|
||||
* be fully utilized. By doing so, injected I/O 'steals' as
|
||||
* few drive-queue slots as possibile to the in-service
|
||||
* queue. This reduces as much as possible the probability
|
||||
* that the service of I/O from the in-service bfq_queue gets
|
||||
* delayed because of slot exhaustion, i.e., because all the
|
||||
* slots of the drive queue are filled with I/O injected from
|
||||
* other queues (NCQ provides for 32 slots).
|
||||
*/
|
||||
unsigned int actuator_load_threshold;
|
||||
};
|
||||
|
||||
enum bfqq_state_flags {
|
||||
@ -929,16 +1009,14 @@ struct bfq_group {
|
||||
|
||||
/* reference counter (see comments in bfq_bic_update_cgroup) */
|
||||
refcount_t ref;
|
||||
/* Is bfq_group still online? */
|
||||
bool online;
|
||||
|
||||
struct bfq_entity entity;
|
||||
struct bfq_sched_data sched_data;
|
||||
|
||||
struct bfq_data *bfqd;
|
||||
|
||||
struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
|
||||
struct bfq_queue *async_idle_bfqq;
|
||||
struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS][BFQ_MAX_ACTUATORS];
|
||||
struct bfq_queue *async_idle_bfqq[BFQ_MAX_ACTUATORS];
|
||||
|
||||
struct bfq_entity *my_entity;
|
||||
|
||||
@ -955,8 +1033,8 @@ struct bfq_group {
|
||||
struct bfq_entity entity;
|
||||
struct bfq_sched_data sched_data;
|
||||
|
||||
struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS];
|
||||
struct bfq_queue *async_idle_bfqq;
|
||||
struct bfq_queue *async_bfqq[2][IOPRIO_NR_LEVELS][BFQ_MAX_ACTUATORS];
|
||||
struct bfq_queue *async_idle_bfqq[BFQ_MAX_ACTUATORS];
|
||||
|
||||
struct rb_root rq_pos_tree;
|
||||
};
|
||||
@ -969,8 +1047,10 @@ struct bfq_group {
|
||||
|
||||
extern const int bfq_timeout;
|
||||
|
||||
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
|
||||
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
|
||||
struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync,
|
||||
unsigned int actuator_idx);
|
||||
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync,
|
||||
unsigned int actuator_idx);
|
||||
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
|
||||
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
void bfq_weights_tree_add(struct bfq_queue *bfqq);
|
||||
|
@ -493,7 +493,7 @@ static void bfq_active_insert(struct bfq_service_tree *st,
|
||||
bfq_update_active_tree(node);
|
||||
|
||||
if (bfqq)
|
||||
list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list);
|
||||
list_add(&bfqq->bfqq_list, &bfqq->bfqd->active_list[bfqq->actuator_idx]);
|
||||
|
||||
bfq_inc_active_entities(entity);
|
||||
}
|
||||
|
@ -124,23 +124,18 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
struct bio_integrity_payload *bip = bio_integrity(bio);
|
||||
struct bio_vec *iv;
|
||||
|
||||
if (bip->bip_vcnt >= bip->bip_max_vcnt) {
|
||||
printk(KERN_ERR "%s: bip_vec full\n", __func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
iv = bip->bip_vec + bip->bip_vcnt;
|
||||
|
||||
if (bip->bip_vcnt &&
|
||||
bvec_gap_to_prev(&bdev_get_queue(bio->bi_bdev)->limits,
|
||||
&bip->bip_vec[bip->bip_vcnt - 1], offset))
|
||||
return 0;
|
||||
|
||||
iv->bv_page = page;
|
||||
iv->bv_len = len;
|
||||
iv->bv_offset = offset;
|
||||
bvec_set_page(&bip->bip_vec[bip->bip_vcnt], page, len, offset);
|
||||
bip->bip_vcnt++;
|
||||
|
||||
return len;
|
||||
@ -418,6 +413,7 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
|
||||
|
||||
bip->bip_vcnt = bip_src->bip_vcnt;
|
||||
bip->bip_iter = bip_src->bip_iter;
|
||||
bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
15
block/bio.c
15
block/bio.c
@ -26,7 +26,6 @@
|
||||
#include "blk-cgroup.h"
|
||||
|
||||
#define ALLOC_CACHE_THRESHOLD 16
|
||||
#define ALLOC_CACHE_SLACK 64
|
||||
#define ALLOC_CACHE_MAX 256
|
||||
|
||||
struct bio_alloc_cache {
|
||||
@ -1029,10 +1028,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
|
||||
if (bio->bi_vcnt >= queue_max_segments(q))
|
||||
return 0;
|
||||
|
||||
bvec = &bio->bi_io_vec[bio->bi_vcnt];
|
||||
bvec->bv_page = page;
|
||||
bvec->bv_len = len;
|
||||
bvec->bv_offset = offset;
|
||||
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, offset);
|
||||
bio->bi_vcnt++;
|
||||
bio->bi_iter.bi_size += len;
|
||||
return len;
|
||||
@ -1108,15 +1104,10 @@ EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
|
||||
void __bio_add_page(struct bio *bio, struct page *page,
|
||||
unsigned int len, unsigned int off)
|
||||
{
|
||||
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt];
|
||||
|
||||
WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
|
||||
WARN_ON_ONCE(bio_full(bio, len));
|
||||
|
||||
bv->bv_page = page;
|
||||
bv->bv_offset = off;
|
||||
bv->bv_len = len;
|
||||
|
||||
bvec_set_page(&bio->bi_io_vec[bio->bi_vcnt], page, len, off);
|
||||
bio->bi_iter.bi_size += len;
|
||||
bio->bi_vcnt++;
|
||||
}
|
||||
@ -1792,6 +1783,8 @@ static int __init init_bio(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
BUILD_BUG_ON(BIO_FLAG_LAST > 8 * sizeof_field(struct bio, bi_flags));
|
||||
|
||||
bio_integrity_init();
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bvec_slabs); i++) {
|
||||
|
@ -118,14 +118,26 @@ static void blkg_free_workfn(struct work_struct *work)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
|
||||
free_work);
|
||||
struct request_queue *q = blkg->q;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* pd_free_fn() can also be called from blkcg_deactivate_policy(),
|
||||
* in order to make sure pd_free_fn() is called in order, the deletion
|
||||
* of the list blkg->q_node is delayed to here from blkg_destroy(), and
|
||||
* blkcg_mutex is used to synchronize blkg_free_workfn() and
|
||||
* blkcg_deactivate_policy().
|
||||
*/
|
||||
mutex_lock(&q->blkcg_mutex);
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++)
|
||||
if (blkg->pd[i])
|
||||
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
list_del_init(&blkg->q_node);
|
||||
mutex_unlock(&q->blkcg_mutex);
|
||||
|
||||
if (blkg->q)
|
||||
blk_put_queue(blkg->q);
|
||||
blk_put_queue(q);
|
||||
free_percpu(blkg->iostat_cpu);
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
kfree(blkg);
|
||||
@ -158,8 +170,6 @@ static void __blkg_release(struct rcu_head *rcu)
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
blkg_put(blkg->parent);
|
||||
blkg_free(blkg);
|
||||
}
|
||||
|
||||
@ -249,16 +259,13 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
|
||||
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, disk->queue->node);
|
||||
if (!blkg)
|
||||
return NULL;
|
||||
|
||||
if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
|
||||
goto err_free;
|
||||
|
||||
goto out_free_blkg;
|
||||
blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
|
||||
if (!blkg->iostat_cpu)
|
||||
goto err_free;
|
||||
|
||||
goto out_exit_refcnt;
|
||||
if (!blk_get_queue(disk->queue))
|
||||
goto err_free;
|
||||
goto out_free_iostat;
|
||||
|
||||
blkg->q = disk->queue;
|
||||
INIT_LIST_HEAD(&blkg->q_node);
|
||||
@ -281,19 +288,28 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
|
||||
continue;
|
||||
|
||||
/* alloc per-policy data and attach it to blkg */
|
||||
pd = pol->pd_alloc_fn(gfp_mask, disk->queue, blkcg);
|
||||
pd = pol->pd_alloc_fn(disk, blkcg, gfp_mask);
|
||||
if (!pd)
|
||||
goto err_free;
|
||||
|
||||
goto out_free_pds;
|
||||
blkg->pd[i] = pd;
|
||||
pd->blkg = blkg;
|
||||
pd->plid = i;
|
||||
pd->online = false;
|
||||
}
|
||||
|
||||
return blkg;
|
||||
|
||||
err_free:
|
||||
blkg_free(blkg);
|
||||
out_free_pds:
|
||||
while (--i >= 0)
|
||||
if (blkg->pd[i])
|
||||
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
|
||||
blk_put_queue(disk->queue);
|
||||
out_free_iostat:
|
||||
free_percpu(blkg->iostat_cpu);
|
||||
out_exit_refcnt:
|
||||
percpu_ref_exit(&blkg->refcnt);
|
||||
out_free_blkg:
|
||||
kfree(blkg);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -359,8 +375,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && pol->pd_online_fn)
|
||||
pol->pd_online_fn(blkg->pd[i]);
|
||||
if (blkg->pd[i]) {
|
||||
if (pol->pd_online_fn)
|
||||
pol->pd_online_fn(blkg->pd[i]);
|
||||
blkg->pd[i]->online = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
blkg->online = true;
|
||||
@ -376,7 +395,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, struct gendisk *disk,
|
||||
err_put_css:
|
||||
css_put(&blkcg->css);
|
||||
err_free_blkg:
|
||||
blkg_free(new_blkg);
|
||||
if (new_blkg)
|
||||
blkg_free(new_blkg);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -458,21 +478,28 @@ static void blkg_destroy(struct blkcg_gq *blkg)
|
||||
lockdep_assert_held(&blkg->q->queue_lock);
|
||||
lockdep_assert_held(&blkcg->lock);
|
||||
|
||||
/* Something wrong if we are trying to remove same group twice */
|
||||
WARN_ON_ONCE(list_empty(&blkg->q_node));
|
||||
WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
|
||||
/*
|
||||
* blkg stays on the queue list until blkg_free_workfn(), see details in
|
||||
* blkg_free_workfn(), hence this function can be called from
|
||||
* blkcg_destroy_blkgs() first and again from blkg_destroy_all() before
|
||||
* blkg_free_workfn().
|
||||
*/
|
||||
if (hlist_unhashed(&blkg->blkcg_node))
|
||||
return;
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
struct blkcg_policy *pol = blkcg_policy[i];
|
||||
|
||||
if (blkg->pd[i] && pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
if (blkg->pd[i] && blkg->pd[i]->online) {
|
||||
blkg->pd[i]->online = false;
|
||||
if (pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[i]);
|
||||
}
|
||||
}
|
||||
|
||||
blkg->online = false;
|
||||
|
||||
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
|
||||
list_del_init(&blkg->q_node);
|
||||
hlist_del_init_rcu(&blkg->blkcg_node);
|
||||
|
||||
/*
|
||||
@ -559,7 +586,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
|
||||
|
||||
const char *blkg_dev_name(struct blkcg_gq *blkg)
|
||||
{
|
||||
if (!blkg->q->disk || !blkg->q->disk->bdi->dev)
|
||||
if (!blkg->q->disk)
|
||||
return NULL;
|
||||
return bdi_dev_name(blkg->q->disk->bdi);
|
||||
}
|
||||
@ -1273,6 +1300,7 @@ int blkcg_init_disk(struct gendisk *disk)
|
||||
int ret;
|
||||
|
||||
INIT_LIST_HEAD(&q->blkg_list);
|
||||
mutex_init(&q->blkcg_mutex);
|
||||
|
||||
new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
|
||||
if (!new_blkg)
|
||||
@ -1349,9 +1377,9 @@ static void blkcg_bind(struct cgroup_subsys_state *root_css)
|
||||
|
||||
static void blkcg_exit(struct task_struct *tsk)
|
||||
{
|
||||
if (tsk->throttle_queue)
|
||||
blk_put_queue(tsk->throttle_queue);
|
||||
tsk->throttle_queue = NULL;
|
||||
if (tsk->throttle_disk)
|
||||
put_disk(tsk->throttle_disk);
|
||||
tsk->throttle_disk = NULL;
|
||||
}
|
||||
|
||||
struct cgroup_subsys io_cgrp_subsys = {
|
||||
@ -1377,14 +1405,14 @@ struct cgroup_subsys io_cgrp_subsys = {
|
||||
EXPORT_SYMBOL_GPL(io_cgrp_subsys);
|
||||
|
||||
/**
|
||||
* blkcg_activate_policy - activate a blkcg policy on a request_queue
|
||||
* @q: request_queue of interest
|
||||
* blkcg_activate_policy - activate a blkcg policy on a gendisk
|
||||
* @disk: gendisk of interest
|
||||
* @pol: blkcg policy to activate
|
||||
*
|
||||
* Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through
|
||||
* Activate @pol on @disk. Requires %GFP_KERNEL context. @disk goes through
|
||||
* bypass mode to populate its blkgs with policy_data for @pol.
|
||||
*
|
||||
* Activation happens with @q bypassed, so nobody would be accessing blkgs
|
||||
* Activation happens with @disk bypassed, so nobody would be accessing blkgs
|
||||
* from IO path. Update of each blkg is protected by both queue and blkcg
|
||||
* locks so that holding either lock and testing blkcg_policy_enabled() is
|
||||
* always enough for dereferencing policy data.
|
||||
@ -1392,9 +1420,9 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys);
|
||||
* The caller is responsible for synchronizing [de]activations and policy
|
||||
* [un]registerations. Returns 0 on success, -errno on failure.
|
||||
*/
|
||||
int blkcg_activate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct blkg_policy_data *pd_prealloc = NULL;
|
||||
struct blkcg_gq *blkg, *pinned_blkg = NULL;
|
||||
int ret;
|
||||
@ -1419,8 +1447,8 @@ retry:
|
||||
pd = pd_prealloc;
|
||||
pd_prealloc = NULL;
|
||||
} else {
|
||||
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
|
||||
blkg->blkcg);
|
||||
pd = pol->pd_alloc_fn(disk, blkg->blkcg,
|
||||
GFP_NOWAIT | __GFP_NOWARN);
|
||||
}
|
||||
|
||||
if (!pd) {
|
||||
@ -1437,8 +1465,8 @@ retry:
|
||||
|
||||
if (pd_prealloc)
|
||||
pol->pd_free_fn(pd_prealloc);
|
||||
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
|
||||
blkg->blkcg);
|
||||
pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
|
||||
GFP_KERNEL);
|
||||
if (pd_prealloc)
|
||||
goto retry;
|
||||
else
|
||||
@ -1448,6 +1476,7 @@ retry:
|
||||
blkg->pd[pol->plid] = pd;
|
||||
pd->blkg = blkg;
|
||||
pd->plid = pol->plid;
|
||||
pd->online = false;
|
||||
}
|
||||
|
||||
/* all allocated, init in the same order */
|
||||
@ -1455,9 +1484,11 @@ retry:
|
||||
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
|
||||
pol->pd_init_fn(blkg->pd[pol->plid]);
|
||||
|
||||
if (pol->pd_online_fn)
|
||||
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
|
||||
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
|
||||
if (pol->pd_online_fn)
|
||||
pol->pd_online_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid]->online = true;
|
||||
}
|
||||
|
||||
__set_bit(pol->plid, q->blkcg_pols);
|
||||
ret = 0;
|
||||
@ -1492,16 +1523,17 @@ enomem:
|
||||
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
|
||||
|
||||
/**
|
||||
* blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
|
||||
* @q: request_queue of interest
|
||||
* blkcg_deactivate_policy - deactivate a blkcg policy on a gendisk
|
||||
* @disk: gendisk of interest
|
||||
* @pol: blkcg policy to deactivate
|
||||
*
|
||||
* Deactivate @pol on @q. Follows the same synchronization rules as
|
||||
* Deactivate @pol on @disk. Follows the same synchronization rules as
|
||||
* blkcg_activate_policy().
|
||||
*/
|
||||
void blkcg_deactivate_policy(struct request_queue *q,
|
||||
void blkcg_deactivate_policy(struct gendisk *disk,
|
||||
const struct blkcg_policy *pol)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
if (!blkcg_policy_enabled(q, pol))
|
||||
@ -1510,6 +1542,7 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
mutex_lock(&q->blkcg_mutex);
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
|
||||
__clear_bit(pol->plid, q->blkcg_pols);
|
||||
@ -1519,7 +1552,7 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
||||
|
||||
spin_lock(&blkcg->lock);
|
||||
if (blkg->pd[pol->plid]) {
|
||||
if (pol->pd_offline_fn)
|
||||
if (blkg->pd[pol->plid]->online && pol->pd_offline_fn)
|
||||
pol->pd_offline_fn(blkg->pd[pol->plid]);
|
||||
pol->pd_free_fn(blkg->pd[pol->plid]);
|
||||
blkg->pd[pol->plid] = NULL;
|
||||
@ -1528,6 +1561,7 @@ void blkcg_deactivate_policy(struct request_queue *q,
|
||||
}
|
||||
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
mutex_unlock(&q->blkcg_mutex);
|
||||
|
||||
if (queue_is_mq(q))
|
||||
blk_mq_unfreeze_queue(q);
|
||||
@ -1797,29 +1831,29 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
*
|
||||
* This is only called if we've been marked with set_notify_resume(). Obviously
|
||||
* we can be set_notify_resume() for reasons other than blkcg throttling, so we
|
||||
* check to see if current->throttle_queue is set and if not this doesn't do
|
||||
* check to see if current->throttle_disk is set and if not this doesn't do
|
||||
* anything. This should only ever be called by the resume code, it's not meant
|
||||
* to be called by people willy-nilly as it will actually do the work to
|
||||
* throttle the task if it is setup for throttling.
|
||||
*/
|
||||
void blkcg_maybe_throttle_current(void)
|
||||
{
|
||||
struct request_queue *q = current->throttle_queue;
|
||||
struct gendisk *disk = current->throttle_disk;
|
||||
struct blkcg *blkcg;
|
||||
struct blkcg_gq *blkg;
|
||||
bool use_memdelay = current->use_memdelay;
|
||||
|
||||
if (!q)
|
||||
if (!disk)
|
||||
return;
|
||||
|
||||
current->throttle_queue = NULL;
|
||||
current->throttle_disk = NULL;
|
||||
current->use_memdelay = false;
|
||||
|
||||
rcu_read_lock();
|
||||
blkcg = css_to_blkcg(blkcg_css());
|
||||
if (!blkcg)
|
||||
goto out;
|
||||
blkg = blkg_lookup(blkcg, q);
|
||||
blkg = blkg_lookup(blkcg, disk->queue);
|
||||
if (!blkg)
|
||||
goto out;
|
||||
if (!blkg_tryget(blkg))
|
||||
@ -1828,11 +1862,10 @@ void blkcg_maybe_throttle_current(void)
|
||||
|
||||
blkcg_maybe_throttle_blkg(blkg, use_memdelay);
|
||||
blkg_put(blkg);
|
||||
blk_put_queue(q);
|
||||
put_disk(disk);
|
||||
return;
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
blk_put_queue(q);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1854,18 +1887,17 @@ out:
|
||||
*/
|
||||
void blkcg_schedule_throttle(struct gendisk *disk, bool use_memdelay)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
if (unlikely(current->flags & PF_KTHREAD))
|
||||
return;
|
||||
|
||||
if (current->throttle_queue != q) {
|
||||
if (!blk_get_queue(q))
|
||||
if (current->throttle_disk != disk) {
|
||||
if (test_bit(GD_DEAD, &disk->state))
|
||||
return;
|
||||
get_device(disk_to_dev(disk));
|
||||
|
||||
if (current->throttle_queue)
|
||||
blk_put_queue(current->throttle_queue);
|
||||
current->throttle_queue = q;
|
||||
if (current->throttle_disk)
|
||||
put_disk(current->throttle_disk);
|
||||
current->throttle_disk = disk;
|
||||
}
|
||||
|
||||
if (use_memdelay)
|
||||
|
@ -135,6 +135,7 @@ struct blkg_policy_data {
|
||||
/* the blkg and policy id this per-policy data belongs to */
|
||||
struct blkcg_gq *blkg;
|
||||
int plid;
|
||||
bool online;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -154,8 +155,8 @@ typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
|
||||
typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
|
||||
typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
|
||||
typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
|
||||
typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
|
||||
struct request_queue *q, struct blkcg *blkcg);
|
||||
typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp);
|
||||
typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
|
||||
typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
|
||||
@ -194,9 +195,8 @@ void blkcg_exit_disk(struct gendisk *disk);
|
||||
/* Blkio controller policy registration */
|
||||
int blkcg_policy_register(struct blkcg_policy *pol);
|
||||
void blkcg_policy_unregister(struct blkcg_policy *pol);
|
||||
int blkcg_activate_policy(struct request_queue *q,
|
||||
const struct blkcg_policy *pol);
|
||||
void blkcg_deactivate_policy(struct request_queue *q,
|
||||
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol);
|
||||
void blkcg_deactivate_policy(struct gendisk *disk,
|
||||
const struct blkcg_policy *pol);
|
||||
|
||||
const char *blkg_dev_name(struct blkcg_gq *blkg);
|
||||
@ -495,9 +495,9 @@ static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
|
||||
static inline void blkcg_exit_disk(struct gendisk *disk) { }
|
||||
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
|
||||
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
|
||||
static inline int blkcg_activate_policy(struct request_queue *q,
|
||||
static inline int blkcg_activate_policy(struct gendisk *disk,
|
||||
const struct blkcg_policy *pol) { return 0; }
|
||||
static inline void blkcg_deactivate_policy(struct request_queue *q,
|
||||
static inline void blkcg_deactivate_policy(struct gendisk *disk,
|
||||
const struct blkcg_policy *pol) { }
|
||||
|
||||
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
|
||||
|
@ -570,7 +570,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
|
||||
return BLK_STS_NOTSUPP;
|
||||
|
||||
/* The bio sector must point to the start of a sequential zone */
|
||||
if (bio->bi_iter.bi_sector & (bdev_zone_sectors(bio->bi_bdev) - 1) ||
|
||||
if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector) ||
|
||||
!bio_zone_is_seq(bio))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
@ -684,6 +684,18 @@ static void __submit_bio_noacct_mq(struct bio *bio)
|
||||
|
||||
void submit_bio_noacct_nocheck(struct bio *bio)
|
||||
{
|
||||
blk_cgroup_bio_start(bio);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_queue(bio);
|
||||
/*
|
||||
* Now that enqueuing has been traced, we need to trace
|
||||
* completion as well.
|
||||
*/
|
||||
bio_set_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
/*
|
||||
* We only want one ->submit_bio to be active at a time, else stack
|
||||
* usage with stacked devices could be a problem. Use current->bio_list
|
||||
@ -788,17 +800,6 @@ void submit_bio_noacct(struct bio *bio)
|
||||
|
||||
if (blk_throtl_bio(bio))
|
||||
return;
|
||||
|
||||
blk_cgroup_bio_start(bio);
|
||||
blkcg_bio_issue_init(bio);
|
||||
|
||||
if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_queue(bio);
|
||||
/* Now that enqueuing has been traced, we need to trace
|
||||
* completion as well.
|
||||
*/
|
||||
bio_set_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
submit_bio_noacct_nocheck(bio);
|
||||
return;
|
||||
|
||||
@ -869,7 +870,16 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
|
||||
*/
|
||||
blk_flush_plug(current->plug, false);
|
||||
|
||||
if (bio_queue_enter(bio))
|
||||
/*
|
||||
* We need to be able to enter a frozen queue, similar to how
|
||||
* timeouts also need to do that. If that is blocked, then we can
|
||||
* have pending IO when a queue freeze is started, and then the
|
||||
* wait for the freeze to finish will wait for polled requests to
|
||||
* timeout as the poller is preventer from entering the queue and
|
||||
* completing them. As long as we prevent new IO from being queued,
|
||||
* that should be all that matters.
|
||||
*/
|
||||
if (!percpu_ref_tryget(&q->q_usage_counter))
|
||||
return 0;
|
||||
if (queue_is_mq(q)) {
|
||||
ret = blk_mq_poll(q, cookie, iob, flags);
|
||||
|
@ -116,7 +116,7 @@ static void blk_crypto_release(struct kobject *kobj)
|
||||
kfree(container_of(kobj, struct blk_crypto_kobj, kobj));
|
||||
}
|
||||
|
||||
static struct kobj_type blk_crypto_ktype = {
|
||||
static const struct kobj_type blk_crypto_ktype = {
|
||||
.default_groups = blk_crypto_attr_groups,
|
||||
.sysfs_ops = &blk_crypto_attr_ops,
|
||||
.release = blk_crypto_release,
|
||||
|
@ -75,7 +75,7 @@ static void blk_ia_range_sysfs_nop_release(struct kobject *kobj)
|
||||
{
|
||||
}
|
||||
|
||||
static struct kobj_type blk_ia_range_ktype = {
|
||||
static const struct kobj_type blk_ia_range_ktype = {
|
||||
.sysfs_ops = &blk_ia_range_sysfs_ops,
|
||||
.default_groups = blk_ia_range_groups,
|
||||
.release = blk_ia_range_sysfs_nop_release,
|
||||
@ -94,7 +94,7 @@ static void blk_ia_ranges_sysfs_release(struct kobject *kobj)
|
||||
kfree(iars);
|
||||
}
|
||||
|
||||
static struct kobj_type blk_ia_ranges_ktype = {
|
||||
static const struct kobj_type blk_ia_ranges_ktype = {
|
||||
.release = blk_ia_ranges_sysfs_release,
|
||||
};
|
||||
|
||||
|
@ -356,7 +356,7 @@ static const struct sysfs_ops integrity_ops = {
|
||||
.store = &integrity_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type integrity_ktype = {
|
||||
static const struct kobj_type integrity_ktype = {
|
||||
.default_groups = integrity_groups,
|
||||
.sysfs_ops = &integrity_ops,
|
||||
};
|
||||
|
@ -258,6 +258,11 @@ enum {
|
||||
VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION,
|
||||
VRATE_CLAMP_ADJ_PCT = 4,
|
||||
|
||||
/* switch iff the conditions are met for longer than this */
|
||||
AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
enum {
|
||||
/* if IOs end up waiting for requests, issue less */
|
||||
RQ_WAIT_BUSY_PCT = 5,
|
||||
|
||||
@ -296,9 +301,6 @@ enum {
|
||||
/* don't let cmds which take a very long time pin lagging for too long */
|
||||
MAX_LAGGING_PERIODS = 10,
|
||||
|
||||
/* switch iff the conditions are met for longer than this */
|
||||
AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC,
|
||||
|
||||
/*
|
||||
* Count IO size in 4k pages. The 12bit shift helps keeping
|
||||
* size-proportional components of cost calculation in closer
|
||||
@ -667,7 +669,7 @@ static struct ioc *q_to_ioc(struct request_queue *q)
|
||||
|
||||
static const char __maybe_unused *ioc_name(struct ioc *ioc)
|
||||
{
|
||||
struct gendisk *disk = ioc->rqos.q->disk;
|
||||
struct gendisk *disk = ioc->rqos.disk;
|
||||
|
||||
if (!disk)
|
||||
return "<unknown>";
|
||||
@ -806,11 +808,11 @@ static int ioc_autop_idx(struct ioc *ioc)
|
||||
u64 now_ns;
|
||||
|
||||
/* rotational? */
|
||||
if (!blk_queue_nonrot(ioc->rqos.q))
|
||||
if (!blk_queue_nonrot(ioc->rqos.disk->queue))
|
||||
return AUTOP_HDD;
|
||||
|
||||
/* handle SATA SSDs w/ broken NCQ */
|
||||
if (blk_queue_depth(ioc->rqos.q) == 1)
|
||||
if (blk_queue_depth(ioc->rqos.disk->queue) == 1)
|
||||
return AUTOP_SSD_QD1;
|
||||
|
||||
/* use one of the normal ssd sets */
|
||||
@ -866,9 +868,14 @@ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops,
|
||||
|
||||
*page = *seqio = *randio = 0;
|
||||
|
||||
if (bps)
|
||||
*page = DIV64_U64_ROUND_UP(VTIME_PER_SEC,
|
||||
DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE));
|
||||
if (bps) {
|
||||
u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE);
|
||||
|
||||
if (bps_pages)
|
||||
*page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages);
|
||||
else
|
||||
*page = 1;
|
||||
}
|
||||
|
||||
if (seqiops) {
|
||||
v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops);
|
||||
@ -926,8 +933,8 @@ static bool ioc_refresh_params(struct ioc *ioc, bool force)
|
||||
|
||||
ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] *
|
||||
VTIME_PER_USEC, MILLION);
|
||||
ioc->vrate_max = div64_u64((u64)ioc->params.qos[QOS_MAX] *
|
||||
VTIME_PER_USEC, MILLION);
|
||||
ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] *
|
||||
VTIME_PER_USEC, MILLION);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -2642,7 +2649,7 @@ retry_lock:
|
||||
if (use_debt) {
|
||||
iocg_incur_debt(iocg, abs_cost, &now);
|
||||
if (iocg_kick_delay(iocg, &now))
|
||||
blkcg_schedule_throttle(rqos->q->disk,
|
||||
blkcg_schedule_throttle(rqos->disk,
|
||||
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||
iocg_unlock(iocg, ioc_locked, &flags);
|
||||
return;
|
||||
@ -2743,7 +2750,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
|
||||
if (likely(!list_empty(&iocg->active_list))) {
|
||||
iocg_incur_debt(iocg, abs_cost, &now);
|
||||
if (iocg_kick_delay(iocg, &now))
|
||||
blkcg_schedule_throttle(rqos->q->disk,
|
||||
blkcg_schedule_throttle(rqos->disk,
|
||||
(bio->bi_opf & REQ_SWAP) == REQ_SWAP);
|
||||
} else {
|
||||
iocg_commit_bio(iocg, bio, abs_cost, cost);
|
||||
@ -2814,7 +2821,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
|
||||
{
|
||||
struct ioc *ioc = rqos_to_ioc(rqos);
|
||||
|
||||
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
|
||||
blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost);
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
ioc->running = IOC_STOP;
|
||||
@ -2825,7 +2832,7 @@ static void ioc_rqos_exit(struct rq_qos *rqos)
|
||||
kfree(ioc);
|
||||
}
|
||||
|
||||
static struct rq_qos_ops ioc_rqos_ops = {
|
||||
static const struct rq_qos_ops ioc_rqos_ops = {
|
||||
.throttle = ioc_rqos_throttle,
|
||||
.merge = ioc_rqos_merge,
|
||||
.done_bio = ioc_rqos_done_bio,
|
||||
@ -2836,9 +2843,7 @@ static struct rq_qos_ops ioc_rqos_ops = {
|
||||
|
||||
static int blk_iocost_init(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct ioc *ioc;
|
||||
struct rq_qos *rqos;
|
||||
int i, cpu, ret;
|
||||
|
||||
ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
|
||||
@ -2861,11 +2866,6 @@ static int blk_iocost_init(struct gendisk *disk)
|
||||
local64_set(&ccs->rq_wait_ns, 0);
|
||||
}
|
||||
|
||||
rqos = &ioc->rqos;
|
||||
rqos->id = RQ_QOS_COST;
|
||||
rqos->ops = &ioc_rqos_ops;
|
||||
rqos->q = q;
|
||||
|
||||
spin_lock_init(&ioc->lock);
|
||||
timer_setup(&ioc->timer, ioc_timer_fn, 0);
|
||||
INIT_LIST_HEAD(&ioc->active_iocgs);
|
||||
@ -2889,17 +2889,17 @@ static int blk_iocost_init(struct gendisk *disk)
|
||||
* called before policy activation completion, can't assume that the
|
||||
* target bio has an iocg associated and need to test for NULL iocg.
|
||||
*/
|
||||
ret = rq_qos_add(q, rqos);
|
||||
ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops);
|
||||
if (ret)
|
||||
goto err_free_ioc;
|
||||
|
||||
ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
|
||||
ret = blkcg_activate_policy(disk, &blkcg_policy_iocost);
|
||||
if (ret)
|
||||
goto err_del_qos;
|
||||
return 0;
|
||||
|
||||
err_del_qos:
|
||||
rq_qos_del(q, rqos);
|
||||
rq_qos_del(&ioc->rqos);
|
||||
err_free_ioc:
|
||||
free_percpu(ioc->pcpu_stat);
|
||||
kfree(ioc);
|
||||
@ -2923,13 +2923,14 @@ static void ioc_cpd_free(struct blkcg_policy_data *cpd)
|
||||
kfree(container_of(cpd, struct ioc_cgrp, cpd));
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
int levels = blkcg->css.cgroup->level + 1;
|
||||
struct ioc_gq *iocg;
|
||||
|
||||
iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, q->node);
|
||||
iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp,
|
||||
disk->node_id);
|
||||
if (!iocg)
|
||||
return NULL;
|
||||
|
||||
@ -3129,6 +3130,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
|
||||
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
|
||||
ioc->params.qos[QOS_RPPM] / 10000,
|
||||
@ -3141,6 +3143,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
|
||||
ioc->params.qos[QOS_MIN] % 10000 / 100,
|
||||
ioc->params.qos[QOS_MAX] / 10000,
|
||||
ioc->params.qos[QOS_MAX] % 10000 / 100);
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3185,6 +3188,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
return PTR_ERR(bdev);
|
||||
|
||||
disk = bdev->bd_disk;
|
||||
if (!queue_is_mq(disk->queue)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ioc = q_to_ioc(disk->queue);
|
||||
if (!ioc) {
|
||||
ret = blk_iocost_init(disk);
|
||||
@ -3212,7 +3220,8 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
|
||||
switch (match_token(p, qos_ctrl_tokens, args)) {
|
||||
case QOS_ENABLE:
|
||||
match_u64(&args[0], &v);
|
||||
if (match_u64(&args[0], &v))
|
||||
goto einval;
|
||||
enable = v;
|
||||
continue;
|
||||
case QOS_CTRL:
|
||||
@ -3270,11 +3279,11 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
|
||||
blk_stat_enable_accounting(disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
||||
ioc->enabled = true;
|
||||
wbt_disable_default(disk->queue);
|
||||
wbt_disable_default(disk);
|
||||
} else {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
|
||||
ioc->enabled = false;
|
||||
wbt_enable_default(disk->queue);
|
||||
wbt_enable_default(disk);
|
||||
}
|
||||
|
||||
if (user) {
|
||||
@ -3314,12 +3323,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
|
||||
if (!dname)
|
||||
return 0;
|
||||
|
||||
spin_lock_irq(&ioc->lock);
|
||||
seq_printf(sf, "%s ctrl=%s model=linear "
|
||||
"rbps=%llu rseqiops=%llu rrandiops=%llu "
|
||||
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
|
||||
dname, ioc->user_cost_model ? "user" : "auto",
|
||||
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
|
||||
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
|
||||
spin_unlock_irq(&ioc->lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3364,6 +3375,11 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
|
||||
return PTR_ERR(bdev);
|
||||
|
||||
q = bdev_get_queue(bdev);
|
||||
if (!queue_is_mq(q)) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ioc = q_to_ioc(q);
|
||||
if (!ioc) {
|
||||
ret = blk_iocost_init(bdev->bd_disk);
|
||||
|
@ -292,7 +292,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos,
|
||||
unsigned use_delay = atomic_read(&lat_to_blkg(iolat)->use_delay);
|
||||
|
||||
if (use_delay)
|
||||
blkcg_schedule_throttle(rqos->q->disk, use_memdelay);
|
||||
blkcg_schedule_throttle(rqos->disk, use_memdelay);
|
||||
|
||||
/*
|
||||
* To avoid priority inversions we want to just take a slot if we are
|
||||
@ -330,7 +330,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
|
||||
struct child_latency_info *lat_info,
|
||||
bool up)
|
||||
{
|
||||
unsigned long qd = blkiolat->rqos.q->nr_requests;
|
||||
unsigned long qd = blkiolat->rqos.disk->queue->nr_requests;
|
||||
unsigned long scale = scale_amount(qd, up);
|
||||
unsigned long old = atomic_read(&lat_info->scale_cookie);
|
||||
unsigned long max_scale = qd << 1;
|
||||
@ -372,7 +372,7 @@ static void scale_cookie_change(struct blk_iolatency *blkiolat,
|
||||
*/
|
||||
static void scale_change(struct iolatency_grp *iolat, bool up)
|
||||
{
|
||||
unsigned long qd = iolat->blkiolat->rqos.q->nr_requests;
|
||||
unsigned long qd = iolat->blkiolat->rqos.disk->queue->nr_requests;
|
||||
unsigned long scale = scale_amount(qd, up);
|
||||
unsigned long old = iolat->max_depth;
|
||||
|
||||
@ -646,11 +646,11 @@ static void blkcg_iolatency_exit(struct rq_qos *rqos)
|
||||
|
||||
timer_shutdown_sync(&blkiolat->timer);
|
||||
flush_work(&blkiolat->enable_work);
|
||||
blkcg_deactivate_policy(rqos->q, &blkcg_policy_iolatency);
|
||||
blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iolatency);
|
||||
kfree(blkiolat);
|
||||
}
|
||||
|
||||
static struct rq_qos_ops blkcg_iolatency_ops = {
|
||||
static const struct rq_qos_ops blkcg_iolatency_ops = {
|
||||
.throttle = blkcg_iolatency_throttle,
|
||||
.done_bio = blkcg_iolatency_done_bio,
|
||||
.exit = blkcg_iolatency_exit,
|
||||
@ -665,7 +665,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
|
||||
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_pre(blkg, pos_css,
|
||||
blkiolat->rqos.q->root_blkg) {
|
||||
blkiolat->rqos.disk->queue->root_blkg) {
|
||||
struct iolatency_grp *iolat;
|
||||
struct child_latency_info *lat_info;
|
||||
unsigned long flags;
|
||||
@ -749,32 +749,26 @@ static void blkiolatency_enable_work_fn(struct work_struct *work)
|
||||
*/
|
||||
enabled = atomic_read(&blkiolat->enable_cnt);
|
||||
if (enabled != blkiolat->enabled) {
|
||||
blk_mq_freeze_queue(blkiolat->rqos.q);
|
||||
blk_mq_freeze_queue(blkiolat->rqos.disk->queue);
|
||||
blkiolat->enabled = enabled;
|
||||
blk_mq_unfreeze_queue(blkiolat->rqos.q);
|
||||
blk_mq_unfreeze_queue(blkiolat->rqos.disk->queue);
|
||||
}
|
||||
}
|
||||
|
||||
int blk_iolatency_init(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct blk_iolatency *blkiolat;
|
||||
struct rq_qos *rqos;
|
||||
int ret;
|
||||
|
||||
blkiolat = kzalloc(sizeof(*blkiolat), GFP_KERNEL);
|
||||
if (!blkiolat)
|
||||
return -ENOMEM;
|
||||
|
||||
rqos = &blkiolat->rqos;
|
||||
rqos->id = RQ_QOS_LATENCY;
|
||||
rqos->ops = &blkcg_iolatency_ops;
|
||||
rqos->q = q;
|
||||
|
||||
ret = rq_qos_add(q, rqos);
|
||||
ret = rq_qos_add(&blkiolat->rqos, disk, RQ_QOS_LATENCY,
|
||||
&blkcg_iolatency_ops);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
|
||||
ret = blkcg_activate_policy(disk, &blkcg_policy_iolatency);
|
||||
if (ret)
|
||||
goto err_qos_del;
|
||||
|
||||
@ -784,7 +778,7 @@ int blk_iolatency_init(struct gendisk *disk)
|
||||
return 0;
|
||||
|
||||
err_qos_del:
|
||||
rq_qos_del(q, rqos);
|
||||
rq_qos_del(&blkiolat->rqos);
|
||||
err_free:
|
||||
kfree(blkiolat);
|
||||
return ret;
|
||||
@ -952,13 +946,12 @@ static void iolatency_pd_stat(struct blkg_policy_data *pd, struct seq_file *s)
|
||||
iolat->max_depth, avg_lat, cur_win);
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
|
||||
struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
static struct blkg_policy_data *iolatency_pd_alloc(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
struct iolatency_grp *iolat;
|
||||
|
||||
iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
|
||||
iolat = kzalloc_node(sizeof(*iolat), gfp, disk->node_id);
|
||||
if (!iolat)
|
||||
return NULL;
|
||||
iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
|
||||
|
@ -116,7 +116,7 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *
|
||||
ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg)
|
||||
ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
struct ioprio_blkg *ioprio_blkg;
|
||||
|
||||
@ -204,12 +204,12 @@ void blkcg_set_ioprio(struct bio *bio)
|
||||
|
||||
void blk_ioprio_exit(struct gendisk *disk)
|
||||
{
|
||||
blkcg_deactivate_policy(disk->queue, &ioprio_policy);
|
||||
blkcg_deactivate_policy(disk, &ioprio_policy);
|
||||
}
|
||||
|
||||
int blk_ioprio_init(struct gendisk *disk)
|
||||
{
|
||||
return blkcg_activate_policy(disk->queue, &ioprio_policy);
|
||||
return blkcg_activate_policy(disk, &ioprio_policy);
|
||||
}
|
||||
|
||||
static int __init ioprio_init(void)
|
||||
|
@ -247,10 +247,8 @@ static struct bio *blk_rq_map_bio_alloc(struct request *rq,
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
if (rq->cmd_flags & REQ_POLLED) {
|
||||
blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE;
|
||||
|
||||
bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask,
|
||||
if (rq->cmd_flags & REQ_ALLOC_CACHE) {
|
||||
bio = bio_alloc_bioset(NULL, nr_vecs, rq->cmd_flags, gfp_mask,
|
||||
&fs_bio_set);
|
||||
if (!bio)
|
||||
return NULL;
|
||||
|
@ -758,6 +758,33 @@ void blk_rq_set_mixed_merge(struct request *rq)
|
||||
rq->rq_flags |= RQF_MIXED_MERGE;
|
||||
}
|
||||
|
||||
static inline blk_opf_t bio_failfast(const struct bio *bio)
|
||||
{
|
||||
if (bio->bi_opf & REQ_RAHEAD)
|
||||
return REQ_FAILFAST_MASK;
|
||||
|
||||
return bio->bi_opf & REQ_FAILFAST_MASK;
|
||||
}
|
||||
|
||||
/*
|
||||
* After we are marked as MIXED_MERGE, any new RA bio has to be updated
|
||||
* as failfast, and request's failfast has to be updated in case of
|
||||
* front merge.
|
||||
*/
|
||||
static inline void blk_update_mixed_merge(struct request *req,
|
||||
struct bio *bio, bool front_merge)
|
||||
{
|
||||
if (req->rq_flags & RQF_MIXED_MERGE) {
|
||||
if (bio->bi_opf & REQ_RAHEAD)
|
||||
bio->bi_opf |= REQ_FAILFAST_MASK;
|
||||
|
||||
if (front_merge) {
|
||||
req->cmd_flags &= ~REQ_FAILFAST_MASK;
|
||||
req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void blk_account_io_merge_request(struct request *req)
|
||||
{
|
||||
if (blk_do_io_stat(req)) {
|
||||
@ -955,7 +982,7 @@ enum bio_merge_status {
|
||||
static enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
|
||||
const blk_opf_t ff = bio_failfast(bio);
|
||||
|
||||
if (!ll_back_merge_fn(req, bio, nr_segs))
|
||||
return BIO_MERGE_FAILED;
|
||||
@ -966,6 +993,8 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
||||
blk_rq_set_mixed_merge(req);
|
||||
|
||||
blk_update_mixed_merge(req, bio, false);
|
||||
|
||||
req->biotail->bi_next = bio;
|
||||
req->biotail = bio;
|
||||
req->__data_len += bio->bi_iter.bi_size;
|
||||
@ -979,7 +1008,7 @@ static enum bio_merge_status bio_attempt_back_merge(struct request *req,
|
||||
static enum bio_merge_status bio_attempt_front_merge(struct request *req,
|
||||
struct bio *bio, unsigned int nr_segs)
|
||||
{
|
||||
const blk_opf_t ff = bio->bi_opf & REQ_FAILFAST_MASK;
|
||||
const blk_opf_t ff = bio_failfast(bio);
|
||||
|
||||
if (!ll_front_merge_fn(req, bio, nr_segs))
|
||||
return BIO_MERGE_FAILED;
|
||||
@ -990,6 +1019,8 @@ static enum bio_merge_status bio_attempt_front_merge(struct request *req,
|
||||
if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
|
||||
blk_rq_set_mixed_merge(req);
|
||||
|
||||
blk_update_mixed_merge(req, bio, true);
|
||||
|
||||
bio->bi_next = req->bio;
|
||||
req->bio = bio;
|
||||
|
||||
|
@ -813,9 +813,9 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
|
||||
|
||||
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
|
||||
{
|
||||
lockdep_assert_held(&rqos->q->debugfs_mutex);
|
||||
lockdep_assert_held(&rqos->disk->queue->debugfs_mutex);
|
||||
|
||||
if (!rqos->q->debugfs_dir)
|
||||
if (!rqos->disk->queue->debugfs_dir)
|
||||
return;
|
||||
debugfs_remove_recursive(rqos->debugfs_dir);
|
||||
rqos->debugfs_dir = NULL;
|
||||
@ -823,7 +823,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
|
||||
|
||||
void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
|
||||
{
|
||||
struct request_queue *q = rqos->q;
|
||||
struct request_queue *q = rqos->disk->queue;
|
||||
const char *dir_name = rq_qos_id_to_name(rqos->id);
|
||||
|
||||
lockdep_assert_held(&q->debugfs_mutex);
|
||||
@ -835,9 +835,7 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
|
||||
q->rqos_debugfs_dir = debugfs_create_dir("rqos",
|
||||
q->debugfs_dir);
|
||||
|
||||
rqos->debugfs_dir = debugfs_create_dir(dir_name,
|
||||
rqos->q->rqos_debugfs_dir);
|
||||
|
||||
rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
|
||||
debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,7 @@
|
||||
#include "blk-wbt.h"
|
||||
|
||||
/*
|
||||
* Mark a hardware queue as needing a restart. For shared queues, maintain
|
||||
* a count of how many hardware queues are marked for restart.
|
||||
* Mark a hardware queue as needing a restart.
|
||||
*/
|
||||
void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
@ -82,7 +81,7 @@ dispatch:
|
||||
/*
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
* restart queue if .get_budget() fails to get the budget.
|
||||
*
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* be run again. This is necessary to avoid starving flushes.
|
||||
@ -210,7 +209,7 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
/*
|
||||
* Only SCSI implements .get_budget and .put_budget, and SCSI restarts
|
||||
* its queue by itself in its completion handler, so we don't need to
|
||||
* restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
|
||||
* restart queue if .get_budget() fails to get the budget.
|
||||
*
|
||||
* Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
|
||||
* be run again. This is necessary to avoid starving flushes.
|
||||
|
@ -46,7 +46,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
|
||||
struct blk_mq_hw_ctx_sysfs_entry {
|
||||
struct attribute attr;
|
||||
ssize_t (*show)(struct blk_mq_hw_ctx *, char *);
|
||||
ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t);
|
||||
};
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
||||
@ -70,28 +69,6 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
|
||||
struct attribute *attr, const char *page,
|
||||
size_t length)
|
||||
{
|
||||
struct blk_mq_hw_ctx_sysfs_entry *entry;
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
struct request_queue *q;
|
||||
ssize_t res;
|
||||
|
||||
entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr);
|
||||
hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj);
|
||||
q = hctx->queue;
|
||||
|
||||
if (!entry->store)
|
||||
return -EIO;
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
res = entry->store(hctx, page, length);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t blk_mq_hw_sysfs_nr_tags_show(struct blk_mq_hw_ctx *hctx,
|
||||
char *page)
|
||||
{
|
||||
@ -150,18 +127,17 @@ ATTRIBUTE_GROUPS(default_hw_ctx);
|
||||
|
||||
static const struct sysfs_ops blk_mq_hw_sysfs_ops = {
|
||||
.show = blk_mq_hw_sysfs_show,
|
||||
.store = blk_mq_hw_sysfs_store,
|
||||
};
|
||||
|
||||
static struct kobj_type blk_mq_ktype = {
|
||||
static const struct kobj_type blk_mq_ktype = {
|
||||
.release = blk_mq_sysfs_release,
|
||||
};
|
||||
|
||||
static struct kobj_type blk_mq_ctx_ktype = {
|
||||
static const struct kobj_type blk_mq_ctx_ktype = {
|
||||
.release = blk_mq_ctx_sysfs_release,
|
||||
};
|
||||
|
||||
static struct kobj_type blk_mq_hw_ktype = {
|
||||
static const struct kobj_type blk_mq_hw_ktype = {
|
||||
.sysfs_ops = &blk_mq_hw_sysfs_ops,
|
||||
.default_groups = default_hw_ctx_groups,
|
||||
.release = blk_mq_hw_sysfs_release,
|
||||
|
152
block/blk-mq.c
152
block/blk-mq.c
@ -658,7 +658,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
|
||||
* allocator for this for the rare use case of a command tied to
|
||||
* a specific queue.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!(flags & (BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED))))
|
||||
if (WARN_ON_ONCE(!(flags & BLK_MQ_REQ_NOWAIT)) ||
|
||||
WARN_ON_ONCE(!(flags & BLK_MQ_REQ_RESERVED)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (hctx_idx >= q->nr_hw_queues)
|
||||
@ -1825,12 +1826,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
|
||||
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq)
|
||||
{
|
||||
struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
|
||||
struct sbitmap_queue *sbq;
|
||||
struct wait_queue_head *wq;
|
||||
wait_queue_entry_t *wait;
|
||||
bool ret;
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)) {
|
||||
if (!(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
|
||||
!(blk_mq_is_shared_tags(hctx->flags))) {
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
|
||||
/*
|
||||
@ -1848,6 +1850,10 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
|
||||
if (!list_empty_careful(&wait->entry))
|
||||
return false;
|
||||
|
||||
if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag))
|
||||
sbq = &hctx->tags->breserved_tags;
|
||||
else
|
||||
sbq = &hctx->tags->bitmap_tags;
|
||||
wq = &bt_wait_ptr(sbq, hctx)->wait;
|
||||
|
||||
spin_lock_irq(&wq->lock);
|
||||
@ -1917,16 +1923,6 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
|
||||
static void blk_mq_handle_dev_resource(struct request *rq,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct request *next =
|
||||
list_first_entry_or_null(list, struct request, queuelist);
|
||||
|
||||
/*
|
||||
* If an I/O scheduler has been configured and we got a driver tag for
|
||||
* the next request already, free it.
|
||||
*/
|
||||
if (next)
|
||||
blk_mq_put_driver_tag(next);
|
||||
|
||||
list_add(&rq->queuelist, list);
|
||||
__blk_mq_requeue_request(rq);
|
||||
}
|
||||
@ -2001,6 +1997,23 @@ static void blk_mq_release_budgets(struct request_queue *q,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* blk_mq_commit_rqs will notify driver using bd->last that there is no
|
||||
* more requests. (See comment in struct blk_mq_ops for commit_rqs for
|
||||
* details)
|
||||
* Attention, we should explicitly call this in unusual cases:
|
||||
* 1) did not queue everything initially scheduled to queue
|
||||
* 2) the last attempt to queue a request failed
|
||||
*/
|
||||
static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
|
||||
bool from_schedule)
|
||||
{
|
||||
if (hctx->queue->mq_ops->commit_rqs && queued) {
|
||||
trace_block_unplug(hctx->queue, queued, !from_schedule);
|
||||
hctx->queue->mq_ops->commit_rqs(hctx);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we did some work AND can potentially do more.
|
||||
*/
|
||||
@ -2009,8 +2022,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
{
|
||||
enum prep_dispatch prep;
|
||||
struct request_queue *q = hctx->queue;
|
||||
struct request *rq, *nxt;
|
||||
int errors, queued;
|
||||
struct request *rq;
|
||||
int queued;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
LIST_HEAD(zone_list);
|
||||
bool needs_resource = false;
|
||||
@ -2021,7 +2034,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
/*
|
||||
* Now process all the entries, sending them to the driver.
|
||||
*/
|
||||
errors = queued = 0;
|
||||
queued = 0;
|
||||
do {
|
||||
struct blk_mq_queue_data bd;
|
||||
|
||||
@ -2035,17 +2048,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
bd.rq = rq;
|
||||
|
||||
/*
|
||||
* Flag last if we have no more requests, or if we have more
|
||||
* but can't assign a driver tag to it.
|
||||
*/
|
||||
if (list_empty(list))
|
||||
bd.last = true;
|
||||
else {
|
||||
nxt = list_first_entry(list, struct request, queuelist);
|
||||
bd.last = !blk_mq_get_driver_tag(nxt);
|
||||
}
|
||||
bd.last = list_empty(list);
|
||||
|
||||
/*
|
||||
* once the request is queued to lld, no need to cover the
|
||||
@ -2074,7 +2077,6 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
|
||||
needs_resource = true;
|
||||
break;
|
||||
default:
|
||||
errors++;
|
||||
blk_mq_end_request(rq, ret);
|
||||
}
|
||||
} while (!list_empty(list));
|
||||
@ -2085,9 +2087,9 @@ out:
|
||||
/* If we didn't flush the entire list, we could have told the driver
|
||||
* there was more coming, but that turned out to be a lie.
|
||||
*/
|
||||
if ((!list_empty(list) || errors || needs_resource ||
|
||||
ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
|
||||
q->mq_ops->commit_rqs(hctx);
|
||||
if (!list_empty(list) || ret != BLK_STS_OK)
|
||||
blk_mq_commit_rqs(hctx, queued, false);
|
||||
|
||||
/*
|
||||
* Any items that need requeuing? Stuff them into hctx->dispatch,
|
||||
* that is where we will continue on next queue run.
|
||||
@ -2096,7 +2098,8 @@ out:
|
||||
bool needs_restart;
|
||||
/* For non-shared tags, the RESTART check will suffice */
|
||||
bool no_tag = prep == PREP_DISPATCH_NO_TAG &&
|
||||
(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED);
|
||||
((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) ||
|
||||
blk_mq_is_shared_tags(hctx->flags));
|
||||
|
||||
if (nr_budgets)
|
||||
blk_mq_release_budgets(q, list);
|
||||
@ -2151,10 +2154,10 @@ out:
|
||||
|
||||
blk_mq_update_dispatch_busy(hctx, true);
|
||||
return false;
|
||||
} else
|
||||
blk_mq_update_dispatch_busy(hctx, false);
|
||||
}
|
||||
|
||||
return (queued + errors) != 0;
|
||||
blk_mq_update_dispatch_busy(hctx, false);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -2548,16 +2551,6 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
|
||||
spin_unlock(&ctx->lock);
|
||||
}
|
||||
|
||||
static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
|
||||
bool from_schedule)
|
||||
{
|
||||
if (hctx->queue->mq_ops->commit_rqs) {
|
||||
trace_block_unplug(hctx->queue, *queued, !from_schedule);
|
||||
hctx->queue->mq_ops->commit_rqs(hctx);
|
||||
}
|
||||
*queued = 0;
|
||||
}
|
||||
|
||||
static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
|
||||
unsigned int nr_segs)
|
||||
{
|
||||
@ -2681,20 +2674,21 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
|
||||
return __blk_mq_try_issue_directly(rq->mq_hctx, rq, true, last);
|
||||
}
|
||||
|
||||
static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
|
||||
static void blk_mq_plug_issue_direct(struct blk_plug *plug)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx = NULL;
|
||||
struct request *rq;
|
||||
int queued = 0;
|
||||
int errors = 0;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
while ((rq = rq_list_pop(&plug->mq_list))) {
|
||||
bool last = rq_list_empty(plug->mq_list);
|
||||
blk_status_t ret;
|
||||
|
||||
if (hctx != rq->mq_hctx) {
|
||||
if (hctx)
|
||||
blk_mq_commit_rqs(hctx, &queued, from_schedule);
|
||||
if (hctx) {
|
||||
blk_mq_commit_rqs(hctx, queued, false);
|
||||
queued = 0;
|
||||
}
|
||||
hctx = rq->mq_hctx;
|
||||
}
|
||||
|
||||
@ -2706,21 +2700,16 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false, true);
|
||||
blk_mq_commit_rqs(hctx, &queued, from_schedule);
|
||||
return;
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
errors++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we didn't flush the entire list, we could have told the driver
|
||||
* there was more coming, but that turned out to be a lie.
|
||||
*/
|
||||
if (errors)
|
||||
blk_mq_commit_rqs(hctx, &queued, from_schedule);
|
||||
out:
|
||||
if (ret != BLK_STS_OK)
|
||||
blk_mq_commit_rqs(hctx, queued, false);
|
||||
}
|
||||
|
||||
static void __blk_mq_flush_plug_list(struct request_queue *q,
|
||||
@ -2791,7 +2780,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
|
||||
}
|
||||
|
||||
blk_mq_run_dispatch_ops(q,
|
||||
blk_mq_plug_issue_direct(plug, false));
|
||||
blk_mq_plug_issue_direct(plug));
|
||||
if (rq_list_empty(plug->mq_list))
|
||||
return;
|
||||
}
|
||||
@ -2805,36 +2794,32 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
|
||||
struct list_head *list)
|
||||
{
|
||||
int queued = 0;
|
||||
int errors = 0;
|
||||
blk_status_t ret = BLK_STS_OK;
|
||||
|
||||
while (!list_empty(list)) {
|
||||
blk_status_t ret;
|
||||
struct request *rq = list_first_entry(list, struct request,
|
||||
queuelist);
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
ret = blk_mq_request_issue_directly(rq, list_empty(list));
|
||||
if (ret != BLK_STS_OK) {
|
||||
errors++;
|
||||
if (ret == BLK_STS_RESOURCE ||
|
||||
ret == BLK_STS_DEV_RESOURCE) {
|
||||
blk_mq_request_bypass_insert(rq, false,
|
||||
list_empty(list));
|
||||
break;
|
||||
}
|
||||
blk_mq_end_request(rq, ret);
|
||||
} else
|
||||
switch (ret) {
|
||||
case BLK_STS_OK:
|
||||
queued++;
|
||||
break;
|
||||
case BLK_STS_RESOURCE:
|
||||
case BLK_STS_DEV_RESOURCE:
|
||||
blk_mq_request_bypass_insert(rq, false,
|
||||
list_empty(list));
|
||||
goto out;
|
||||
default:
|
||||
blk_mq_end_request(rq, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we didn't flush the entire list, we could have told
|
||||
* the driver there was more coming, but that turned out to
|
||||
* be a lie.
|
||||
*/
|
||||
if ((!list_empty(list) || errors) &&
|
||||
hctx->queue->mq_ops->commit_rqs && queued)
|
||||
hctx->queue->mq_ops->commit_rqs(hctx);
|
||||
out:
|
||||
if (ret != BLK_STS_OK)
|
||||
blk_mq_commit_rqs(hctx, queued, false);
|
||||
}
|
||||
|
||||
static bool blk_mq_attempt_bio_merge(struct request_queue *q,
|
||||
@ -2894,15 +2879,16 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
|
||||
|
||||
if (!plug)
|
||||
return NULL;
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
|
||||
if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
|
||||
*bio = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rq = rq_list_peek(&plug->cached_rq);
|
||||
if (!rq || rq->q != q)
|
||||
return NULL;
|
||||
|
||||
type = blk_mq_get_hctx_type((*bio)->bi_opf);
|
||||
hctx_type = rq->mq_hctx->type;
|
||||
if (type != hctx_type &&
|
||||
|
@ -294,3 +294,70 @@ void rq_qos_exit(struct request_queue *q)
|
||||
rqos->ops->exit(rqos);
|
||||
}
|
||||
}
|
||||
|
||||
int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||
const struct rq_qos_ops *ops)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
|
||||
rqos->disk = disk;
|
||||
rqos->id = id;
|
||||
rqos->ops = ops;
|
||||
|
||||
/*
|
||||
* No IO can be in-flight when adding rqos, so freeze queue, which
|
||||
* is fine since we only support rq_qos for blk-mq queue.
|
||||
*
|
||||
* Reuse ->queue_lock for protecting against other concurrent
|
||||
* rq_qos adding/deleting
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
if (rq_qos_id(q, rqos->id))
|
||||
goto ebusy;
|
||||
rqos->next = q->rq_qos;
|
||||
q->rq_qos = rqos;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (rqos->ops->debugfs_attrs) {
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
blk_mq_debugfs_register_rqos(rqos);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
ebusy:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
void rq_qos_del(struct rq_qos *rqos)
|
||||
{
|
||||
struct request_queue *q = rqos->disk->queue;
|
||||
struct rq_qos **cur;
|
||||
|
||||
/*
|
||||
* See comment in rq_qos_add() about freezing queue & using
|
||||
* ->queue_lock.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||
if (*cur == rqos) {
|
||||
*cur = rqos->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
blk_mq_debugfs_unregister_rqos(rqos);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
}
|
||||
|
@ -25,8 +25,8 @@ struct rq_wait {
|
||||
};
|
||||
|
||||
struct rq_qos {
|
||||
struct rq_qos_ops *ops;
|
||||
struct request_queue *q;
|
||||
const struct rq_qos_ops *ops;
|
||||
struct gendisk *disk;
|
||||
enum rq_qos_id id;
|
||||
struct rq_qos *next;
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
@ -85,65 +85,9 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
|
||||
init_waitqueue_head(&rq_wait->wait);
|
||||
}
|
||||
|
||||
static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
/*
|
||||
* No IO can be in-flight when adding rqos, so freeze queue, which
|
||||
* is fine since we only support rq_qos for blk-mq queue.
|
||||
*
|
||||
* Reuse ->queue_lock for protecting against other concurrent
|
||||
* rq_qos adding/deleting
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
if (rq_qos_id(q, rqos->id))
|
||||
goto ebusy;
|
||||
rqos->next = q->rq_qos;
|
||||
q->rq_qos = rqos;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (rqos->ops->debugfs_attrs) {
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
blk_mq_debugfs_register_rqos(rqos);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
ebusy:
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
blk_mq_unfreeze_queue(q);
|
||||
return -EBUSY;
|
||||
|
||||
}
|
||||
|
||||
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
struct rq_qos **cur;
|
||||
|
||||
/*
|
||||
* See comment in rq_qos_add() about freezing queue & using
|
||||
* ->queue_lock.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||
if (*cur == rqos) {
|
||||
*cur = rqos->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
mutex_lock(&q->debugfs_mutex);
|
||||
blk_mq_debugfs_unregister_rqos(rqos);
|
||||
mutex_unlock(&q->debugfs_mutex);
|
||||
}
|
||||
int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
|
||||
const struct rq_qos_ops *ops);
|
||||
void rq_qos_del(struct rq_qos *rqos);
|
||||
|
||||
typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
|
||||
typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/dma-mapping.h>
|
||||
|
||||
#include "blk.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "blk-wbt.h"
|
||||
|
||||
void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
|
||||
@ -40,7 +41,7 @@ void blk_set_default_limits(struct queue_limits *lim)
|
||||
lim->virt_boundary_mask = 0;
|
||||
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
|
||||
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
|
||||
lim->max_dev_sectors = 0;
|
||||
lim->max_user_sectors = lim->max_dev_sectors = 0;
|
||||
lim->chunk_sectors = 0;
|
||||
lim->max_write_zeroes_sectors = 0;
|
||||
lim->max_zone_append_sectors = 0;
|
||||
@ -135,7 +136,12 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
|
||||
limits->max_hw_sectors = max_hw_sectors;
|
||||
|
||||
max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors);
|
||||
max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS);
|
||||
|
||||
if (limits->max_user_sectors)
|
||||
max_sectors = min(max_sectors, limits->max_user_sectors);
|
||||
else
|
||||
max_sectors = min(max_sectors, BLK_DEF_MAX_SECTORS);
|
||||
|
||||
max_sectors = round_down(max_sectors,
|
||||
limits->logical_block_size >> SECTOR_SHIFT);
|
||||
limits->max_sectors = max_sectors;
|
||||
|
@ -58,7 +58,8 @@ void blk_stat_add(struct request *rq, u64 now)
|
||||
|
||||
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
|
||||
|
||||
blk_throtl_stat_add(rq, value);
|
||||
if (req_op(rq) == REQ_OP_READ || req_op(rq) == REQ_OP_WRITE)
|
||||
blk_throtl_stat_add(rq, value);
|
||||
|
||||
rcu_read_lock();
|
||||
cpu = get_cpu();
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "blk-mq.h"
|
||||
#include "blk-mq-debugfs.h"
|
||||
#include "blk-mq-sched.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-cgroup.h"
|
||||
#include "blk-throttle.h"
|
||||
@ -239,19 +240,28 @@ static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
|
||||
static ssize_t
|
||||
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
|
||||
{
|
||||
unsigned long max_sectors_kb,
|
||||
unsigned long var;
|
||||
unsigned int max_sectors_kb,
|
||||
max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1,
|
||||
page_kb = 1 << (PAGE_SHIFT - 10);
|
||||
ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
|
||||
ssize_t ret = queue_var_store(&var, page, count);
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb, (unsigned long)
|
||||
max_sectors_kb = (unsigned int)var;
|
||||
max_hw_sectors_kb = min_not_zero(max_hw_sectors_kb,
|
||||
q->limits.max_dev_sectors >> 1);
|
||||
|
||||
if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
|
||||
return -EINVAL;
|
||||
if (max_sectors_kb == 0) {
|
||||
q->limits.max_user_sectors = 0;
|
||||
max_sectors_kb = min(max_hw_sectors_kb,
|
||||
BLK_DEF_MAX_SECTORS >> 1);
|
||||
} else {
|
||||
if (max_sectors_kb > max_hw_sectors_kb ||
|
||||
max_sectors_kb < page_kb)
|
||||
return -EINVAL;
|
||||
q->limits.max_user_sectors = max_sectors_kb << 1;
|
||||
}
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
q->limits.max_sectors = max_sectors_kb << 1;
|
||||
@ -491,7 +501,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
|
||||
|
||||
rqos = wbt_rq_qos(q);
|
||||
if (!rqos) {
|
||||
ret = wbt_init(q);
|
||||
ret = wbt_init(q->disk);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -755,7 +765,7 @@ static void blk_queue_release(struct kobject *kobj)
|
||||
/* nothing to do here, all data is associated with the parent gendisk */
|
||||
}
|
||||
|
||||
static struct kobj_type blk_queue_ktype = {
|
||||
static const struct kobj_type blk_queue_ktype = {
|
||||
.default_groups = blk_queue_attr_groups,
|
||||
.sysfs_ops = &queue_sysfs_ops,
|
||||
.release = blk_queue_release,
|
||||
@ -817,7 +827,7 @@ int blk_register_queue(struct gendisk *disk)
|
||||
goto out_elv_unregister;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
|
||||
wbt_enable_default(q);
|
||||
wbt_enable_default(disk);
|
||||
blk_throtl_register(disk);
|
||||
|
||||
/* Now everything is ready and send out KOBJ_ADD uevent */
|
||||
|
@ -335,14 +335,13 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
|
||||
timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
|
||||
}
|
||||
|
||||
static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
|
||||
struct request_queue *q,
|
||||
struct blkcg *blkcg)
|
||||
static struct blkg_policy_data *throtl_pd_alloc(struct gendisk *disk,
|
||||
struct blkcg *blkcg, gfp_t gfp)
|
||||
{
|
||||
struct throtl_grp *tg;
|
||||
int rw;
|
||||
|
||||
tg = kzalloc_node(sizeof(*tg), gfp, q->node);
|
||||
tg = kzalloc_node(sizeof(*tg), gfp, disk->node_id);
|
||||
if (!tg)
|
||||
return NULL;
|
||||
|
||||
@ -2395,7 +2394,7 @@ int blk_throtl_init(struct gendisk *disk)
|
||||
td->low_downgrade_time = jiffies;
|
||||
|
||||
/* activate policy */
|
||||
ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
|
||||
ret = blkcg_activate_policy(disk, &blkcg_policy_throtl);
|
||||
if (ret) {
|
||||
free_percpu(td->latency_buckets[READ]);
|
||||
free_percpu(td->latency_buckets[WRITE]);
|
||||
@ -2411,7 +2410,7 @@ void blk_throtl_exit(struct gendisk *disk)
|
||||
BUG_ON(!q->td);
|
||||
del_timer_sync(&q->td->service_queue.pending_timer);
|
||||
throtl_shutdown_wq(q);
|
||||
blkcg_deactivate_policy(q, &blkcg_policy_throtl);
|
||||
blkcg_deactivate_policy(disk, &blkcg_policy_throtl);
|
||||
free_percpu(q->td->latency_buckets[READ]);
|
||||
free_percpu(q->td->latency_buckets[WRITE]);
|
||||
kfree(q->td);
|
||||
|
116
block/blk-wbt.c
116
block/blk-wbt.c
@ -25,6 +25,7 @@
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/swap.h>
|
||||
|
||||
#include "blk-stat.h"
|
||||
#include "blk-wbt.h"
|
||||
#include "blk-rq-qos.h"
|
||||
#include "elevator.h"
|
||||
@ -32,6 +33,72 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/wbt.h>
|
||||
|
||||
enum wbt_flags {
|
||||
WBT_TRACKED = 1, /* write, tracked for throttling */
|
||||
WBT_READ = 2, /* read */
|
||||
WBT_KSWAPD = 4, /* write, from kswapd */
|
||||
WBT_DISCARD = 8, /* discard */
|
||||
|
||||
WBT_NR_BITS = 4, /* number of bits */
|
||||
};
|
||||
|
||||
enum {
|
||||
WBT_RWQ_BG = 0,
|
||||
WBT_RWQ_KSWAPD,
|
||||
WBT_RWQ_DISCARD,
|
||||
WBT_NUM_RWQ,
|
||||
};
|
||||
|
||||
/*
|
||||
* If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other
|
||||
* state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered
|
||||
* to WBT_STATE_OFF/ON_MANUAL.
|
||||
*/
|
||||
enum {
|
||||
WBT_STATE_ON_DEFAULT = 1, /* on by default */
|
||||
WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */
|
||||
WBT_STATE_OFF_DEFAULT = 3, /* off by default */
|
||||
WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */
|
||||
};
|
||||
|
||||
struct rq_wb {
|
||||
/*
|
||||
* Settings that govern how we throttle
|
||||
*/
|
||||
unsigned int wb_background; /* background writeback */
|
||||
unsigned int wb_normal; /* normal writeback */
|
||||
|
||||
short enable_state; /* WBT_STATE_* */
|
||||
|
||||
/*
|
||||
* Number of consecutive periods where we don't have enough
|
||||
* information to make a firm scale up/down decision.
|
||||
*/
|
||||
unsigned int unknown_cnt;
|
||||
|
||||
u64 win_nsec; /* default window size */
|
||||
u64 cur_win_nsec; /* current window size */
|
||||
|
||||
struct blk_stat_callback *cb;
|
||||
|
||||
u64 sync_issue;
|
||||
void *sync_cookie;
|
||||
|
||||
unsigned int wc;
|
||||
|
||||
unsigned long last_issue; /* last non-throttled issue */
|
||||
unsigned long last_comp; /* last non-throttled comp */
|
||||
unsigned long min_lat_nsec;
|
||||
struct rq_qos rqos;
|
||||
struct rq_wait rq_wait[WBT_NUM_RWQ];
|
||||
struct rq_depth rq_depth;
|
||||
};
|
||||
|
||||
static inline struct rq_wb *RQWB(struct rq_qos *rqos)
|
||||
{
|
||||
return container_of(rqos, struct rq_wb, rqos);
|
||||
}
|
||||
|
||||
static inline void wbt_clear_state(struct request *rq)
|
||||
{
|
||||
rq->wbt_flags = 0;
|
||||
@ -98,7 +165,7 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
|
||||
*/
|
||||
static bool wb_recent_wait(struct rq_wb *rwb)
|
||||
{
|
||||
struct bdi_writeback *wb = &rwb->rqos.q->disk->bdi->wb;
|
||||
struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
|
||||
|
||||
return time_before(jiffies, wb->dirty_sleep + HZ);
|
||||
}
|
||||
@ -226,6 +293,16 @@ static u64 rwb_sync_issue_lat(struct rq_wb *rwb)
|
||||
return now - issue;
|
||||
}
|
||||
|
||||
static inline unsigned int wbt_inflight(struct rq_wb *rwb)
|
||||
{
|
||||
unsigned int i, ret = 0;
|
||||
|
||||
for (i = 0; i < WBT_NUM_RWQ; i++)
|
||||
ret += atomic_read(&rwb->rq_wait[i].inflight);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum {
|
||||
LAT_OK = 1,
|
||||
LAT_UNKNOWN,
|
||||
@ -235,7 +312,7 @@ enum {
|
||||
|
||||
static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||
{
|
||||
struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
|
||||
struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
|
||||
struct rq_depth *rqd = &rwb->rq_depth;
|
||||
u64 thislat;
|
||||
|
||||
@ -288,7 +365,7 @@ static int latency_exceeded(struct rq_wb *rwb, struct blk_rq_stat *stat)
|
||||
|
||||
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
|
||||
{
|
||||
struct backing_dev_info *bdi = rwb->rqos.q->disk->bdi;
|
||||
struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
|
||||
struct rq_depth *rqd = &rwb->rq_depth;
|
||||
|
||||
trace_wbt_step(bdi, msg, rqd->scale_step, rwb->cur_win_nsec,
|
||||
@ -358,13 +435,12 @@ static void wb_timer_fn(struct blk_stat_callback *cb)
|
||||
unsigned int inflight = wbt_inflight(rwb);
|
||||
int status;
|
||||
|
||||
if (!rwb->rqos.q->disk)
|
||||
if (!rwb->rqos.disk)
|
||||
return;
|
||||
|
||||
status = latency_exceeded(rwb, cb->stat);
|
||||
|
||||
trace_wbt_timer(rwb->rqos.q->disk->bdi, status, rqd->scale_step,
|
||||
inflight);
|
||||
trace_wbt_timer(rwb->rqos.disk->bdi, status, rqd->scale_step, inflight);
|
||||
|
||||
/*
|
||||
* If we exceeded the latency target, step down. If we did not,
|
||||
@ -650,8 +726,9 @@ void wbt_set_write_cache(struct request_queue *q, bool write_cache_on)
|
||||
/*
|
||||
* Enable wbt if defaults are configured that way
|
||||
*/
|
||||
void wbt_enable_default(struct request_queue *q)
|
||||
void wbt_enable_default(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct rq_qos *rqos;
|
||||
bool disable_flag = q->elevator &&
|
||||
test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
|
||||
@ -670,7 +747,7 @@ void wbt_enable_default(struct request_queue *q)
|
||||
return;
|
||||
|
||||
if (queue_is_mq(q) && !disable_flag)
|
||||
wbt_init(q);
|
||||
wbt_init(disk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wbt_enable_default);
|
||||
|
||||
@ -701,16 +778,15 @@ static int wbt_data_dir(const struct request *rq)
|
||||
|
||||
static void wbt_queue_depth_changed(struct rq_qos *rqos)
|
||||
{
|
||||
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->q);
|
||||
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
|
||||
wbt_update_limits(RQWB(rqos));
|
||||
}
|
||||
|
||||
static void wbt_exit(struct rq_qos *rqos)
|
||||
{
|
||||
struct rq_wb *rwb = RQWB(rqos);
|
||||
struct request_queue *q = rqos->q;
|
||||
|
||||
blk_stat_remove_callback(q, rwb->cb);
|
||||
blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
|
||||
blk_stat_free_callback(rwb->cb);
|
||||
kfree(rwb);
|
||||
}
|
||||
@ -718,9 +794,9 @@ static void wbt_exit(struct rq_qos *rqos)
|
||||
/*
|
||||
* Disable wbt, if enabled by default.
|
||||
*/
|
||||
void wbt_disable_default(struct request_queue *q)
|
||||
void wbt_disable_default(struct gendisk *disk)
|
||||
{
|
||||
struct rq_qos *rqos = wbt_rq_qos(q);
|
||||
struct rq_qos *rqos = wbt_rq_qos(disk->queue);
|
||||
struct rq_wb *rwb;
|
||||
if (!rqos)
|
||||
return;
|
||||
@ -820,7 +896,7 @@ static const struct blk_mq_debugfs_attr wbt_debugfs_attrs[] = {
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct rq_qos_ops wbt_rqos_ops = {
|
||||
static const struct rq_qos_ops wbt_rqos_ops = {
|
||||
.throttle = wbt_wait,
|
||||
.issue = wbt_issue,
|
||||
.track = wbt_track,
|
||||
@ -834,8 +910,9 @@ static struct rq_qos_ops wbt_rqos_ops = {
|
||||
#endif
|
||||
};
|
||||
|
||||
int wbt_init(struct request_queue *q)
|
||||
int wbt_init(struct gendisk *disk)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
struct rq_wb *rwb;
|
||||
int i;
|
||||
int ret;
|
||||
@ -853,22 +930,19 @@ int wbt_init(struct request_queue *q)
|
||||
for (i = 0; i < WBT_NUM_RWQ; i++)
|
||||
rq_wait_init(&rwb->rq_wait[i]);
|
||||
|
||||
rwb->rqos.id = RQ_QOS_WBT;
|
||||
rwb->rqos.ops = &wbt_rqos_ops;
|
||||
rwb->rqos.q = q;
|
||||
rwb->last_comp = rwb->last_issue = jiffies;
|
||||
rwb->win_nsec = RWB_WINDOW_NSEC;
|
||||
rwb->enable_state = WBT_STATE_ON_DEFAULT;
|
||||
rwb->wc = test_bit(QUEUE_FLAG_WC, &q->queue_flags);
|
||||
rwb->rq_depth.default_depth = RWB_DEF_DEPTH;
|
||||
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
|
||||
|
||||
wbt_queue_depth_changed(&rwb->rqos);
|
||||
rwb->rq_depth.queue_depth = blk_queue_depth(q);
|
||||
wbt_update_limits(rwb);
|
||||
|
||||
/*
|
||||
* Assign rwb and add the stats callback.
|
||||
*/
|
||||
ret = rq_qos_add(q, &rwb->rqos);
|
||||
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
|
||||
if (ret)
|
||||
goto err_free;
|
||||
|
||||
|
@ -2,97 +2,11 @@
|
||||
#ifndef WB_THROTTLE_H
|
||||
#define WB_THROTTLE_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/ktime.h>
|
||||
|
||||
#include "blk-stat.h"
|
||||
#include "blk-rq-qos.h"
|
||||
|
||||
enum wbt_flags {
|
||||
WBT_TRACKED = 1, /* write, tracked for throttling */
|
||||
WBT_READ = 2, /* read */
|
||||
WBT_KSWAPD = 4, /* write, from kswapd */
|
||||
WBT_DISCARD = 8, /* discard */
|
||||
|
||||
WBT_NR_BITS = 4, /* number of bits */
|
||||
};
|
||||
|
||||
enum {
|
||||
WBT_RWQ_BG = 0,
|
||||
WBT_RWQ_KSWAPD,
|
||||
WBT_RWQ_DISCARD,
|
||||
WBT_NUM_RWQ,
|
||||
};
|
||||
|
||||
/*
|
||||
* If current state is WBT_STATE_ON/OFF_DEFAULT, it can be covered to any other
|
||||
* state, if current state is WBT_STATE_ON/OFF_MANUAL, it can only be covered
|
||||
* to WBT_STATE_OFF/ON_MANUAL.
|
||||
*/
|
||||
enum {
|
||||
WBT_STATE_ON_DEFAULT = 1, /* on by default */
|
||||
WBT_STATE_ON_MANUAL = 2, /* on manually by sysfs */
|
||||
WBT_STATE_OFF_DEFAULT = 3, /* off by default */
|
||||
WBT_STATE_OFF_MANUAL = 4, /* off manually by sysfs */
|
||||
};
|
||||
|
||||
struct rq_wb {
|
||||
/*
|
||||
* Settings that govern how we throttle
|
||||
*/
|
||||
unsigned int wb_background; /* background writeback */
|
||||
unsigned int wb_normal; /* normal writeback */
|
||||
|
||||
short enable_state; /* WBT_STATE_* */
|
||||
|
||||
/*
|
||||
* Number of consecutive periods where we don't have enough
|
||||
* information to make a firm scale up/down decision.
|
||||
*/
|
||||
unsigned int unknown_cnt;
|
||||
|
||||
u64 win_nsec; /* default window size */
|
||||
u64 cur_win_nsec; /* current window size */
|
||||
|
||||
struct blk_stat_callback *cb;
|
||||
|
||||
u64 sync_issue;
|
||||
void *sync_cookie;
|
||||
|
||||
unsigned int wc;
|
||||
|
||||
unsigned long last_issue; /* last non-throttled issue */
|
||||
unsigned long last_comp; /* last non-throttled comp */
|
||||
unsigned long min_lat_nsec;
|
||||
struct rq_qos rqos;
|
||||
struct rq_wait rq_wait[WBT_NUM_RWQ];
|
||||
struct rq_depth rq_depth;
|
||||
};
|
||||
|
||||
static inline struct rq_wb *RQWB(struct rq_qos *rqos)
|
||||
{
|
||||
return container_of(rqos, struct rq_wb, rqos);
|
||||
}
|
||||
|
||||
static inline unsigned int wbt_inflight(struct rq_wb *rwb)
|
||||
{
|
||||
unsigned int i, ret = 0;
|
||||
|
||||
for (i = 0; i < WBT_NUM_RWQ; i++)
|
||||
ret += atomic_read(&rwb->rq_wait[i].inflight);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_BLK_WBT
|
||||
|
||||
int wbt_init(struct request_queue *);
|
||||
void wbt_disable_default(struct request_queue *);
|
||||
void wbt_enable_default(struct request_queue *);
|
||||
int wbt_init(struct gendisk *disk);
|
||||
void wbt_disable_default(struct gendisk *disk);
|
||||
void wbt_enable_default(struct gendisk *disk);
|
||||
|
||||
u64 wbt_get_min_lat(struct request_queue *q);
|
||||
void wbt_set_min_lat(struct request_queue *q, u64 val);
|
||||
@ -104,14 +18,14 @@ u64 wbt_default_latency_nsec(struct request_queue *);
|
||||
|
||||
#else
|
||||
|
||||
static inline int wbt_init(struct request_queue *q)
|
||||
static inline int wbt_init(struct gendisk *disk)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline void wbt_disable_default(struct request_queue *q)
|
||||
static inline void wbt_disable_default(struct gendisk *disk)
|
||||
{
|
||||
}
|
||||
static inline void wbt_enable_default(struct request_queue *q)
|
||||
static inline void wbt_enable_default(struct gendisk *disk)
|
||||
{
|
||||
}
|
||||
static inline void wbt_set_write_cache(struct request_queue *q, bool wc)
|
||||
|
@ -277,10 +277,10 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
|
||||
return -EINVAL;
|
||||
|
||||
/* Check alignment (handle eventual smaller last zone) */
|
||||
if (sector & (zone_sectors - 1))
|
||||
if (!bdev_is_zone_start(bdev, sector))
|
||||
return -EINVAL;
|
||||
|
||||
if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
|
||||
if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
@ -126,7 +126,7 @@ static struct elevator_type *elevator_find_get(struct request_queue *q,
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct kobj_type elv_ktype;
|
||||
static const struct kobj_type elv_ktype;
|
||||
|
||||
struct elevator_queue *elevator_alloc(struct request_queue *q,
|
||||
struct elevator_type *e)
|
||||
@ -455,7 +455,7 @@ static const struct sysfs_ops elv_sysfs_ops = {
|
||||
.store = elv_attr_store,
|
||||
};
|
||||
|
||||
static struct kobj_type elv_ktype = {
|
||||
static const struct kobj_type elv_ktype = {
|
||||
.sysfs_ops = &elv_sysfs_ops,
|
||||
.release = elevator_release,
|
||||
};
|
||||
|
21
block/fops.c
21
block/fops.c
@ -221,6 +221,24 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
bio_endio(bio);
|
||||
break;
|
||||
}
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
/*
|
||||
* This is nonblocking IO, and we need to allocate
|
||||
* another bio if we have data left to map. As we
|
||||
* cannot guarantee that one of the sub bios will not
|
||||
* fail getting issued FOR NOWAIT and as error results
|
||||
* are coalesced across all of them, be safe and ask for
|
||||
* a retry of this from blocking context.
|
||||
*/
|
||||
if (unlikely(iov_iter_count(iter))) {
|
||||
bio_release_pages(bio, false);
|
||||
bio_clear_flag(bio, BIO_REFFED);
|
||||
bio_put(bio);
|
||||
blk_finish_plug(&plug);
|
||||
return -EAGAIN;
|
||||
}
|
||||
bio->bi_opf |= REQ_NOWAIT;
|
||||
}
|
||||
|
||||
if (is_read) {
|
||||
if (dio->flags & DIO_SHOULD_DIRTY)
|
||||
@ -228,9 +246,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
} else {
|
||||
task_io_account_write(bio->bi_iter.bi_size);
|
||||
}
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
bio->bi_opf |= REQ_NOWAIT;
|
||||
|
||||
dio->size += bio->bi_iter.bi_size;
|
||||
pos += bio->bi_iter.bi_size;
|
||||
|
||||
|
@ -1016,9 +1016,8 @@ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
|
||||
static ssize_t disk_capability_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct gendisk *disk = dev_to_disk(dev);
|
||||
|
||||
return sprintf(buf, "%x\n", disk->flags);
|
||||
dev_warn_once(dev, "the capability attribute has been deprecated.\n");
|
||||
return sprintf(buf, "0\n");
|
||||
}
|
||||
|
||||
static ssize_t disk_alignment_offset_show(struct device *dev,
|
||||
|
@ -78,32 +78,25 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up and return a brd's page for a given sector.
|
||||
* If one does not exist, allocate an empty page, and insert that. Then
|
||||
* return it.
|
||||
* Insert a new page for a given sector, if one does not already exist.
|
||||
*/
|
||||
static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
||||
static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
|
||||
{
|
||||
pgoff_t idx;
|
||||
struct page *page;
|
||||
gfp_t gfp_flags;
|
||||
int ret = 0;
|
||||
|
||||
page = brd_lookup_page(brd, sector);
|
||||
if (page)
|
||||
return page;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Must use NOIO because we don't want to recurse back into the
|
||||
* block or filesystem layers from page reclaim.
|
||||
*/
|
||||
gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
|
||||
page = alloc_page(gfp_flags);
|
||||
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
|
||||
if (!page)
|
||||
return NULL;
|
||||
return -ENOMEM;
|
||||
|
||||
if (radix_tree_preload(GFP_NOIO)) {
|
||||
if (radix_tree_maybe_preload(gfp)) {
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
spin_lock(&brd->brd_lock);
|
||||
@ -112,16 +105,17 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
|
||||
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
|
||||
__free_page(page);
|
||||
page = radix_tree_lookup(&brd->brd_pages, idx);
|
||||
BUG_ON(!page);
|
||||
BUG_ON(page->index != idx);
|
||||
if (!page)
|
||||
ret = -ENOMEM;
|
||||
else if (page->index != idx)
|
||||
ret = -EIO;
|
||||
} else {
|
||||
brd->brd_nr_pages++;
|
||||
}
|
||||
spin_unlock(&brd->brd_lock);
|
||||
|
||||
radix_tree_preload_end();
|
||||
|
||||
return page;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -170,20 +164,22 @@ static void brd_free_pages(struct brd_device *brd)
|
||||
/*
|
||||
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
|
||||
*/
|
||||
static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
|
||||
static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
|
||||
gfp_t gfp)
|
||||
{
|
||||
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
|
||||
size_t copy;
|
||||
int ret;
|
||||
|
||||
copy = min_t(size_t, n, PAGE_SIZE - offset);
|
||||
if (!brd_insert_page(brd, sector))
|
||||
return -ENOSPC;
|
||||
ret = brd_insert_page(brd, sector, gfp);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (copy < n) {
|
||||
sector += copy >> SECTOR_SHIFT;
|
||||
if (!brd_insert_page(brd, sector))
|
||||
return -ENOSPC;
|
||||
ret = brd_insert_page(brd, sector, gfp);
|
||||
}
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -256,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
|
||||
* Process a single bvec of a bio.
|
||||
*/
|
||||
static int brd_do_bvec(struct brd_device *brd, struct page *page,
|
||||
unsigned int len, unsigned int off, enum req_op op,
|
||||
unsigned int len, unsigned int off, blk_opf_t opf,
|
||||
sector_t sector)
|
||||
{
|
||||
void *mem;
|
||||
int err = 0;
|
||||
|
||||
if (op_is_write(op)) {
|
||||
err = copy_to_brd_setup(brd, sector, len);
|
||||
if (op_is_write(opf)) {
|
||||
/*
|
||||
* Must use NOIO because we don't want to recurse back into the
|
||||
* block or filesystem layers from page reclaim.
|
||||
*/
|
||||
gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
|
||||
|
||||
err = copy_to_brd_setup(brd, sector, len, gfp);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
mem = kmap_atomic(page);
|
||||
if (!op_is_write(op)) {
|
||||
if (!op_is_write(opf)) {
|
||||
copy_from_brd(mem + off, brd, sector, len);
|
||||
flush_dcache_page(page);
|
||||
} else {
|
||||
@ -298,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
|
||||
(len & (SECTOR_SIZE - 1)));
|
||||
|
||||
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
|
||||
bio_op(bio), sector);
|
||||
bio->bi_opf, sector);
|
||||
if (err) {
|
||||
if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
@ -412,6 +418,7 @@ static int brd_alloc(int i)
|
||||
/* Tell the block layer that this is not a rotational device */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
|
||||
err = add_disk(disk);
|
||||
if (err)
|
||||
goto out_cleanup_disk;
|
||||
|
@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
drbd-y := drbd_bitmap.o drbd_proc.o
|
||||
drbd-y := drbd_buildtag.o drbd_bitmap.o drbd_proc.o
|
||||
drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
|
||||
drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
|
||||
drbd-y += drbd_interval.o drbd_state.o
|
||||
|
22
drivers/block/drbd/drbd_buildtag.c
Normal file
22
drivers/block/drbd/drbd_buildtag.c
Normal file
@ -0,0 +1,22 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/drbd_config.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
const char *drbd_buildtag(void)
|
||||
{
|
||||
/* DRBD built from external sources has here a reference to the
|
||||
* git hash of the source code.
|
||||
*/
|
||||
|
||||
static char buildtag[38] = "\0uilt-in";
|
||||
|
||||
if (buildtag[0] == 0) {
|
||||
#ifdef MODULE
|
||||
sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
|
||||
#else
|
||||
buildtag[0] = 'b';
|
||||
#endif
|
||||
}
|
||||
|
||||
return buildtag;
|
||||
}
|
@ -844,7 +844,7 @@ static int drbd_version_show(struct seq_file *m, void *ignored)
|
||||
{
|
||||
seq_printf(m, "# %s\n", drbd_buildtag());
|
||||
seq_printf(m, "VERSION=%s\n", REL_VERSION);
|
||||
seq_printf(m, "API_VERSION=%u\n", API_VERSION);
|
||||
seq_printf(m, "API_VERSION=%u\n", GENL_MAGIC_VERSION);
|
||||
seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
|
||||
seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
|
||||
return 0;
|
||||
|
@ -34,21 +34,12 @@
|
||||
#include <linux/prefetch.h>
|
||||
#include <linux/drbd_genl_api.h>
|
||||
#include <linux/drbd.h>
|
||||
#include <linux/drbd_config.h>
|
||||
#include "drbd_strings.h"
|
||||
#include "drbd_state.h"
|
||||
#include "drbd_protocol.h"
|
||||
#include "drbd_polymorph_printk.h"
|
||||
|
||||
#ifdef __CHECKER__
|
||||
# define __protected_by(x) __attribute__((require_context(x,1,999,"rdwr")))
|
||||
# define __protected_read_by(x) __attribute__((require_context(x,1,999,"read")))
|
||||
# define __protected_write_by(x) __attribute__((require_context(x,1,999,"write")))
|
||||
#else
|
||||
# define __protected_by(x)
|
||||
# define __protected_read_by(x)
|
||||
# define __protected_write_by(x)
|
||||
#endif
|
||||
|
||||
/* shared module parameters, defined in drbd_main.c */
|
||||
#ifdef CONFIG_DRBD_FAULT_INJECTION
|
||||
extern int drbd_enable_faults;
|
||||
@ -774,7 +765,7 @@ struct drbd_device {
|
||||
unsigned long flags;
|
||||
|
||||
/* configured by drbdsetup */
|
||||
struct drbd_backing_dev *ldev __protected_by(local);
|
||||
struct drbd_backing_dev *ldev;
|
||||
|
||||
sector_t p_size; /* partner's disk size */
|
||||
struct request_queue *rq_queue;
|
||||
|
@ -58,7 +58,7 @@ drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
* drbd_contains_interval - check if a tree contains a given interval
|
||||
* @root: red black tree root
|
||||
* @sector: start sector of @interval
|
||||
* @interval: may not be a valid pointer
|
||||
* @interval: may be an invalid pointer
|
||||
*
|
||||
* Returns if the tree contains the node @interval with start sector @start.
|
||||
* Does not dereference @interval until @interval is known to be a valid object
|
||||
@ -95,6 +95,10 @@ drbd_contains_interval(struct rb_root *root, sector_t sector,
|
||||
void
|
||||
drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
|
||||
{
|
||||
/* avoid endless loop */
|
||||
if (drbd_interval_empty(this))
|
||||
return;
|
||||
|
||||
rb_erase_augmented(&this->rb, root, &augment_callbacks);
|
||||
}
|
||||
|
||||
|
@ -2899,7 +2899,7 @@ static int __init drbd_init(void)
|
||||
|
||||
pr_info("initialized. "
|
||||
"Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
|
||||
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
|
||||
GENL_MAGIC_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
|
||||
pr_info("%s\n", drbd_buildtag());
|
||||
pr_info("registered as block device major %d\n", DRBD_MAJOR);
|
||||
return 0; /* Success! */
|
||||
@ -3776,24 +3776,6 @@ _drbd_insert_fault(struct drbd_device *device, unsigned int type)
|
||||
}
|
||||
#endif
|
||||
|
||||
const char *drbd_buildtag(void)
|
||||
{
|
||||
/* DRBD built from external sources has here a reference to the
|
||||
git hash of the source code. */
|
||||
|
||||
static char buildtag[38] = "\0uilt-in";
|
||||
|
||||
if (buildtag[0] == 0) {
|
||||
#ifdef MODULE
|
||||
sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
|
||||
#else
|
||||
buildtag[0] = 'b';
|
||||
#endif
|
||||
}
|
||||
|
||||
return buildtag;
|
||||
}
|
||||
|
||||
module_init(drbd_init)
|
||||
module_exit(drbd_cleanup)
|
||||
|
||||
|
@ -228,7 +228,7 @@ int drbd_seq_show(struct seq_file *seq, void *v)
|
||||
};
|
||||
|
||||
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
|
||||
API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
|
||||
GENL_MAGIC_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX, drbd_buildtag());
|
||||
|
||||
/*
|
||||
cs .. connection state
|
||||
|
@ -327,7 +327,7 @@ static inline int bitstream_get_bits(struct bitstream *bs, u64 *out, int bits)
|
||||
*/
|
||||
static inline int vli_encode_bits(struct bitstream *bs, u64 in)
|
||||
{
|
||||
u64 code = code;
|
||||
u64 code;
|
||||
int bits = __vli_encode_bits(&code, in);
|
||||
|
||||
if (bits <= 0)
|
||||
|
@ -90,7 +90,7 @@ struct loop_cmd {
|
||||
};
|
||||
|
||||
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
|
||||
#define LOOP_DEFAULT_HW_Q_DEPTH (128)
|
||||
#define LOOP_DEFAULT_HW_Q_DEPTH 128
|
||||
|
||||
static DEFINE_IDR(loop_index_idr);
|
||||
static DEFINE_MUTEX(loop_ctl_mutex);
|
||||
@ -1792,9 +1792,15 @@ static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH;
|
||||
|
||||
static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p)
|
||||
{
|
||||
int ret = kstrtoint(s, 10, &hw_queue_depth);
|
||||
int qd, ret;
|
||||
|
||||
return (ret || (hw_queue_depth < 1)) ? -EINVAL : 0;
|
||||
ret = kstrtoint(s, 0, &qd);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (qd < 1)
|
||||
return -EINVAL;
|
||||
hw_queue_depth = qd;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
|
||||
@ -1803,7 +1809,7 @@ static const struct kernel_param_ops loop_hw_qdepth_param_ops = {
|
||||
};
|
||||
|
||||
device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444);
|
||||
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 128");
|
||||
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH));
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
|
||||
|
@ -2123,8 +2123,7 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
blk_queue_physical_block_size(nullb->q, dev->blocksize);
|
||||
if (!dev->max_sectors)
|
||||
dev->max_sectors = queue_max_hw_sectors(nullb->q);
|
||||
dev->max_sectors = min_t(unsigned int, dev->max_sectors,
|
||||
BLK_DEF_MAX_SECTORS);
|
||||
dev->max_sectors = min(dev->max_sectors, BLK_DEF_MAX_SECTORS);
|
||||
blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
|
||||
|
||||
if (dev->virt_boundary)
|
||||
|
@ -586,10 +586,6 @@ static void ps3vram_submit_bio(struct bio *bio)
|
||||
|
||||
dev_dbg(&dev->core, "%s\n", __func__);
|
||||
|
||||
bio = bio_split_to_limits(bio);
|
||||
if (!bio)
|
||||
return;
|
||||
|
||||
spin_lock_irq(&priv->lock);
|
||||
busy = !bio_list_empty(&priv->list);
|
||||
bio_list_add(&priv->list, bio);
|
||||
@ -749,9 +745,6 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
|
||||
gendisk->private_data = dev;
|
||||
strscpy(gendisk->disk_name, DEVICE_NAME, sizeof(gendisk->disk_name));
|
||||
set_capacity(gendisk, priv->size >> 9);
|
||||
blk_queue_max_segments(gendisk->queue, BLK_MAX_SEGMENTS);
|
||||
blk_queue_max_segment_size(gendisk->queue, BLK_MAX_SEGMENT_SIZE);
|
||||
blk_queue_max_hw_sectors(gendisk->queue, BLK_SAFE_MAX_SECTORS);
|
||||
|
||||
dev_info(&dev->core, "%s: Using %llu MiB of GPU memory\n",
|
||||
gendisk->disk_name, get_capacity(gendisk) >> 11);
|
||||
|
@ -3068,13 +3068,12 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
|
||||
|
||||
for (i = 0; i < obj_req->copyup_bvec_count; i++) {
|
||||
unsigned int len = min(obj_overlap, (u64)PAGE_SIZE);
|
||||
struct page *page = alloc_page(GFP_NOIO);
|
||||
|
||||
obj_req->copyup_bvecs[i].bv_page = alloc_page(GFP_NOIO);
|
||||
if (!obj_req->copyup_bvecs[i].bv_page)
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
obj_req->copyup_bvecs[i].bv_offset = 0;
|
||||
obj_req->copyup_bvecs[i].bv_len = len;
|
||||
bvec_set_page(&obj_req->copyup_bvecs[i], page, len, 0);
|
||||
obj_overlap -= len;
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <linux/mm.h>
|
||||
#include <asm/page.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/namei.h>
|
||||
#include <uapi/linux/ublk_cmd.h>
|
||||
|
||||
#define UBLK_MINORS (1U << MINORBITS)
|
||||
@ -51,10 +52,12 @@
|
||||
| UBLK_F_URING_CMD_COMP_IN_TASK \
|
||||
| UBLK_F_NEED_GET_DATA \
|
||||
| UBLK_F_USER_RECOVERY \
|
||||
| UBLK_F_USER_RECOVERY_REISSUE)
|
||||
| UBLK_F_USER_RECOVERY_REISSUE \
|
||||
| UBLK_F_UNPRIVILEGED_DEV)
|
||||
|
||||
/* All UBLK_PARAM_TYPE_* should be included here */
|
||||
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
|
||||
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
|
||||
UBLK_PARAM_TYPE_DISCARD | UBLK_PARAM_TYPE_DEVT)
|
||||
|
||||
struct ublk_rq_data {
|
||||
struct llist_node node;
|
||||
@ -147,6 +150,7 @@ struct ublk_device {
|
||||
|
||||
#define UB_STATE_OPEN 0
|
||||
#define UB_STATE_USED 1
|
||||
#define UB_STATE_DELETED 2
|
||||
unsigned long state;
|
||||
int ub_number;
|
||||
|
||||
@ -159,7 +163,7 @@ struct ublk_device {
|
||||
|
||||
struct completion completion;
|
||||
unsigned int nr_queues_ready;
|
||||
atomic_t nr_aborted_queues;
|
||||
unsigned int nr_privileged_daemon;
|
||||
|
||||
/*
|
||||
* Our ubq->daemon may be killed without any notification, so
|
||||
@ -185,6 +189,15 @@ static wait_queue_head_t ublk_idr_wq; /* wait until one idr is freed */
|
||||
|
||||
static DEFINE_MUTEX(ublk_ctl_mutex);
|
||||
|
||||
/*
|
||||
* Max ublk devices allowed to add
|
||||
*
|
||||
* It can be extended to one per-user limit in future or even controlled
|
||||
* by cgroup.
|
||||
*/
|
||||
static unsigned int ublks_max = 64;
|
||||
static unsigned int ublks_added; /* protected by ublk_ctl_mutex */
|
||||
|
||||
static struct miscdevice ublk_misc;
|
||||
|
||||
static void ublk_dev_param_basic_apply(struct ublk_device *ub)
|
||||
@ -255,6 +268,10 @@ static int ublk_validate_params(const struct ublk_device *ub)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* dev_t is read-only */
|
||||
if (ub->params.types & UBLK_PARAM_TYPE_DEVT)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -306,7 +323,7 @@ static inline struct ublk_queue *ublk_get_queue(struct ublk_device *dev,
|
||||
|
||||
static inline bool ublk_rq_has_data(const struct request *rq)
|
||||
{
|
||||
return rq->bio && bio_has_data(rq->bio);
|
||||
return bio_has_data(rq->bio);
|
||||
}
|
||||
|
||||
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
|
||||
@ -361,8 +378,50 @@ static void ublk_free_disk(struct gendisk *disk)
|
||||
put_device(&ub->cdev_dev);
|
||||
}
|
||||
|
||||
static void ublk_store_owner_uid_gid(unsigned int *owner_uid,
|
||||
unsigned int *owner_gid)
|
||||
{
|
||||
kuid_t uid;
|
||||
kgid_t gid;
|
||||
|
||||
current_uid_gid(&uid, &gid);
|
||||
|
||||
*owner_uid = from_kuid(&init_user_ns, uid);
|
||||
*owner_gid = from_kgid(&init_user_ns, gid);
|
||||
}
|
||||
|
||||
static int ublk_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct ublk_device *ub = bdev->bd_disk->private_data;
|
||||
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If it is one unprivileged device, only owner can open
|
||||
* the disk. Otherwise it could be one trap made by one
|
||||
* evil user who grants this disk's privileges to other
|
||||
* users deliberately.
|
||||
*
|
||||
* This way is reasonable too given anyone can create
|
||||
* unprivileged device, and no need other's grant.
|
||||
*/
|
||||
if (ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV) {
|
||||
unsigned int curr_uid, curr_gid;
|
||||
|
||||
ublk_store_owner_uid_gid(&curr_uid, &curr_gid);
|
||||
|
||||
if (curr_uid != ub->dev_info.owner_uid || curr_gid !=
|
||||
ub->dev_info.owner_gid)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct block_device_operations ub_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ublk_open,
|
||||
.free_disk = ublk_free_disk,
|
||||
};
|
||||
|
||||
@ -607,7 +666,7 @@ static void ublk_complete_rq(struct request *req)
|
||||
}
|
||||
|
||||
/*
|
||||
* FLUSH or DISCARD usually won't return bytes returned, so end them
|
||||
* FLUSH, DISCARD or WRITE_ZEROES usually won't return bytes returned, so end them
|
||||
* directly.
|
||||
*
|
||||
* Both the two needn't unmap.
|
||||
@ -1179,6 +1238,9 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
ubq->ubq_daemon = current;
|
||||
get_task_struct(ubq->ubq_daemon);
|
||||
ub->nr_queues_ready++;
|
||||
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
ub->nr_privileged_daemon++;
|
||||
}
|
||||
if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
|
||||
complete_all(&ub->completion);
|
||||
@ -1203,6 +1265,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
u32 cmd_op = cmd->cmd_op;
|
||||
unsigned tag = ub_cmd->tag;
|
||||
int ret = -EINVAL;
|
||||
struct request *req;
|
||||
|
||||
pr_devel("%s: received: cmd op %d queue %d tag %d result %d\n",
|
||||
__func__, cmd->cmd_op, ub_cmd->q_id, tag,
|
||||
@ -1253,8 +1316,8 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
*/
|
||||
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
|
||||
goto out;
|
||||
/* FETCH_RQ has to provide IO buffer */
|
||||
if (!ub_cmd->addr)
|
||||
/* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
|
||||
if (!ub_cmd->addr && !ublk_need_get_data(ubq))
|
||||
goto out;
|
||||
io->cmd = cmd;
|
||||
io->flags |= UBLK_IO_FLAG_ACTIVE;
|
||||
@ -1263,8 +1326,12 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
ublk_mark_io_ready(ub, ubq);
|
||||
break;
|
||||
case UBLK_IO_COMMIT_AND_FETCH_REQ:
|
||||
/* FETCH_RQ has to provide IO buffer */
|
||||
if (!ub_cmd->addr)
|
||||
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
|
||||
/*
|
||||
* COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
|
||||
* not enabled or it is Read IO.
|
||||
*/
|
||||
if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
|
||||
goto out;
|
||||
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
|
||||
goto out;
|
||||
@ -1433,6 +1500,8 @@ static int ublk_add_chdev(struct ublk_device *ub)
|
||||
ret = cdev_device_add(&ub->cdev, dev);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
ublks_added++;
|
||||
return 0;
|
||||
fail:
|
||||
put_device(dev);
|
||||
@ -1475,6 +1544,7 @@ static void ublk_remove(struct ublk_device *ub)
|
||||
cancel_work_sync(&ub->quiesce_work);
|
||||
cdev_device_del(&ub->cdev, &ub->cdev_dev);
|
||||
put_device(&ub->cdev_dev);
|
||||
ublks_added--;
|
||||
}
|
||||
|
||||
static struct ublk_device *ublk_get_device_from_id(int idx)
|
||||
@ -1493,21 +1563,16 @@ static struct ublk_device *ublk_get_device_from_id(int idx)
|
||||
return ub;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
struct ublk_device *ub;
|
||||
struct gendisk *disk;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (ublksrv_pid <= 0)
|
||||
return -EINVAL;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
wait_for_completion_interruptible(&ub->completion);
|
||||
|
||||
schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
|
||||
@ -1519,7 +1584,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
|
||||
disk = blk_mq_alloc_disk(&ub->tag_set, NULL);
|
||||
if (IS_ERR(disk)) {
|
||||
ret = PTR_ERR(disk);
|
||||
goto out_unlock;
|
||||
@ -1535,6 +1600,10 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
|
||||
if (ret)
|
||||
goto out_put_disk;
|
||||
|
||||
/* don't probe partitions if any one ubq daemon is un-trusted */
|
||||
if (ub->nr_privileged_daemon != ub->nr_queues_ready)
|
||||
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
|
||||
|
||||
get_device(&ub->cdev_dev);
|
||||
ret = add_disk(disk);
|
||||
if (ret) {
|
||||
@ -1552,21 +1621,20 @@ out_put_disk:
|
||||
put_disk(disk);
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_get_queue_affinity(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_device *ub;
|
||||
cpumask_var_t cpumask;
|
||||
unsigned long queue;
|
||||
unsigned int retlen;
|
||||
unsigned int i;
|
||||
int ret = -EINVAL;
|
||||
|
||||
int ret;
|
||||
|
||||
if (header->len * BITS_PER_BYTE < nr_cpu_ids)
|
||||
return -EINVAL;
|
||||
if (header->len & (sizeof(unsigned long)-1))
|
||||
@ -1574,17 +1642,12 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd)
|
||||
if (!header->addr)
|
||||
return -EINVAL;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
queue = header->data[0];
|
||||
if (queue >= ub->dev_info.nr_hw_queues)
|
||||
goto out_put_device;
|
||||
return -EINVAL;
|
||||
|
||||
ret = -ENOMEM;
|
||||
if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
|
||||
goto out_put_device;
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
if (ub->tag_set.map[HCTX_TYPE_DEFAULT].mq_map[i] == queue)
|
||||
@ -1602,8 +1665,6 @@ static int ublk_ctrl_get_queue_affinity(struct io_uring_cmd *cmd)
|
||||
ret = 0;
|
||||
out_free_cpumask:
|
||||
free_cpumask_var(cpumask);
|
||||
out_put_device:
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1630,19 +1691,34 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
__func__, header->queue_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (copy_from_user(&info, argp, sizeof(info)))
|
||||
return -EFAULT;
|
||||
ublk_dump_dev_info(&info);
|
||||
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
info.flags &= ~UBLK_F_UNPRIVILEGED_DEV;
|
||||
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
|
||||
return -EPERM;
|
||||
|
||||
/* the created device is always owned by current user */
|
||||
ublk_store_owner_uid_gid(&info.owner_uid, &info.owner_gid);
|
||||
|
||||
if (header->dev_id != info.dev_id) {
|
||||
pr_warn("%s: dev id not match %u %u\n",
|
||||
__func__, header->dev_id, info.dev_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ublk_dump_dev_info(&info);
|
||||
|
||||
ret = mutex_lock_killable(&ublk_ctl_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -EACCES;
|
||||
if (ublks_added >= ublks_max)
|
||||
goto out_unlock;
|
||||
|
||||
ret = -ENOMEM;
|
||||
ub = kzalloc(sizeof(*ub), GFP_KERNEL);
|
||||
if (!ub)
|
||||
@ -1724,33 +1800,43 @@ static inline bool ublk_idr_freed(int id)
|
||||
return ptr == NULL;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_del_dev(int idx)
|
||||
static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
|
||||
{
|
||||
struct ublk_device *ub;
|
||||
struct ublk_device *ub = *p_ub;
|
||||
int idx = ub->ub_number;
|
||||
int ret;
|
||||
|
||||
ret = mutex_lock_killable(&ublk_ctl_mutex);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ub = ublk_get_device_from_id(idx);
|
||||
if (ub) {
|
||||
if (!test_bit(UB_STATE_DELETED, &ub->state)) {
|
||||
ublk_remove(ub);
|
||||
ublk_put_device(ub);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENODEV;
|
||||
set_bit(UB_STATE_DELETED, &ub->state);
|
||||
}
|
||||
|
||||
/* Mark the reference as consumed */
|
||||
*p_ub = NULL;
|
||||
ublk_put_device(ub);
|
||||
mutex_unlock(&ublk_ctl_mutex);
|
||||
|
||||
/*
|
||||
* Wait until the idr is removed, then it can be reused after
|
||||
* DEL_DEV command is returned.
|
||||
*
|
||||
* If we returns because of user interrupt, future delete command
|
||||
* may come:
|
||||
*
|
||||
* - the device number isn't freed, this device won't or needn't
|
||||
* be deleted again, since UB_STATE_DELETED is set, and device
|
||||
* will be released after the last reference is dropped
|
||||
*
|
||||
* - the device number is freed already, we will not find this
|
||||
* device via ublk_get_device_from_id()
|
||||
*/
|
||||
if (!ret)
|
||||
wait_event(ublk_idr_wq, ublk_idr_freed(idx));
|
||||
mutex_unlock(&ublk_ctl_mutex);
|
||||
wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx));
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
|
||||
@ -1762,50 +1848,52 @@ static inline void ublk_ctrl_cmd_dump(struct io_uring_cmd *cmd)
|
||||
header->data[0], header->addr, header->len);
|
||||
}
|
||||
|
||||
static int ublk_ctrl_stop_dev(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_stop_dev(struct ublk_device *ub)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
struct ublk_device *ub;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
ublk_stop_dev(ub);
|
||||
cancel_work_sync(&ub->stop_work);
|
||||
cancel_work_sync(&ub->quiesce_work);
|
||||
|
||||
ublk_put_device(ub);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_get_dev_info(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_device *ub;
|
||||
int ret = 0;
|
||||
|
||||
if (header->len < sizeof(struct ublksrv_ctrl_dev_info) || !header->addr)
|
||||
return -EINVAL;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_to_user(argp, &ub->dev_info, sizeof(ub->dev_info)))
|
||||
ret = -EFAULT;
|
||||
ublk_put_device(ub);
|
||||
return -EFAULT;
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
|
||||
/* TYPE_DEVT is readonly, so fill it up before returning to userspace */
|
||||
static void ublk_ctrl_fill_params_devt(struct ublk_device *ub)
|
||||
{
|
||||
ub->params.devt.char_major = MAJOR(ub->cdev_dev.devt);
|
||||
ub->params.devt.char_minor = MINOR(ub->cdev_dev.devt);
|
||||
|
||||
if (ub->ub_disk) {
|
||||
ub->params.devt.disk_major = MAJOR(disk_devt(ub->ub_disk));
|
||||
ub->params.devt.disk_minor = MINOR(disk_devt(ub->ub_disk));
|
||||
} else {
|
||||
ub->params.devt.disk_major = 0;
|
||||
ub->params.devt.disk_minor = 0;
|
||||
}
|
||||
ub->params.types |= UBLK_PARAM_TYPE_DEVT;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_get_params(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_params_header ph;
|
||||
struct ublk_device *ub;
|
||||
int ret;
|
||||
|
||||
if (header->len <= sizeof(ph) || !header->addr)
|
||||
@ -1820,27 +1908,23 @@ static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
|
||||
if (ph.len > sizeof(struct ublk_params))
|
||||
ph.len = sizeof(struct ublk_params);
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
ublk_ctrl_fill_params_devt(ub);
|
||||
if (copy_to_user(argp, &ub->params, ph.len))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
mutex_unlock(&ub->mutex);
|
||||
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_set_params(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
struct ublk_params_header ph;
|
||||
struct ublk_device *ub;
|
||||
int ret = -EFAULT;
|
||||
|
||||
if (header->len <= sizeof(ph) || !header->addr)
|
||||
@ -1855,10 +1939,6 @@ static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
|
||||
if (ph.len > sizeof(struct ublk_params))
|
||||
ph.len = sizeof(struct ublk_params);
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return -EINVAL;
|
||||
|
||||
/* parameters can only be changed when device isn't live */
|
||||
mutex_lock(&ub->mutex);
|
||||
if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
|
||||
@ -1871,7 +1951,6 @@ static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
|
||||
ret = ublk_validate_params(ub);
|
||||
}
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1898,17 +1977,13 @@ static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
|
||||
}
|
||||
}
|
||||
|
||||
static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_start_recovery(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
struct ublk_device *ub;
|
||||
int ret = -EINVAL;
|
||||
int i;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&ub->mutex);
|
||||
if (!ublk_can_use_recovery(ub))
|
||||
goto out_unlock;
|
||||
@ -1936,25 +2011,21 @@ static int ublk_ctrl_start_recovery(struct io_uring_cmd *cmd)
|
||||
/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
|
||||
ub->mm = NULL;
|
||||
ub->nr_queues_ready = 0;
|
||||
ub->nr_privileged_daemon = 0;
|
||||
init_completion(&ub->completion);
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd)
|
||||
static int ublk_ctrl_end_recovery(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
int ublksrv_pid = (int)header->data[0];
|
||||
struct ublk_device *ub;
|
||||
int ret = -EINVAL;
|
||||
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
return ret;
|
||||
|
||||
pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
|
||||
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
|
||||
/* wait until new ubq_daemon sending all FETCH_REQ */
|
||||
@ -1982,7 +2053,115 @@ static int ublk_ctrl_end_recovery(struct io_uring_cmd *cmd)
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&ub->mutex);
|
||||
ublk_put_device(ub);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* All control commands are sent via /dev/ublk-control, so we have to check
|
||||
* the destination device's permission
|
||||
*/
|
||||
static int ublk_char_dev_permission(struct ublk_device *ub,
|
||||
const char *dev_path, int mask)
|
||||
{
|
||||
int err;
|
||||
struct path path;
|
||||
struct kstat stat;
|
||||
|
||||
err = kern_path(dev_path, LOOKUP_FOLLOW, &path);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT);
|
||||
if (err)
|
||||
goto exit;
|
||||
|
||||
err = -EPERM;
|
||||
if (stat.rdev != ub->cdev_dev.devt || !S_ISCHR(stat.mode))
|
||||
goto exit;
|
||||
|
||||
err = inode_permission(&nop_mnt_idmap,
|
||||
d_backing_inode(path.dentry), mask);
|
||||
exit:
|
||||
path_put(&path);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ublk_ctrl_uring_cmd_permission(struct ublk_device *ub,
|
||||
struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
bool unprivileged = ub->dev_info.flags & UBLK_F_UNPRIVILEGED_DEV;
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
char *dev_path = NULL;
|
||||
int ret = 0;
|
||||
int mask;
|
||||
|
||||
if (!unprivileged) {
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
/*
|
||||
* The new added command of UBLK_CMD_GET_DEV_INFO2 includes
|
||||
* char_dev_path in payload too, since userspace may not
|
||||
* know if the specified device is created as unprivileged
|
||||
* mode.
|
||||
*/
|
||||
if (cmd->cmd_op != UBLK_CMD_GET_DEV_INFO2)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* User has to provide the char device path for unprivileged ublk
|
||||
*
|
||||
* header->addr always points to the dev path buffer, and
|
||||
* header->dev_path_len records length of dev path buffer.
|
||||
*/
|
||||
if (!header->dev_path_len || header->dev_path_len > PATH_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (header->len < header->dev_path_len)
|
||||
return -EINVAL;
|
||||
|
||||
dev_path = kmalloc(header->dev_path_len + 1, GFP_KERNEL);
|
||||
if (!dev_path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (copy_from_user(dev_path, argp, header->dev_path_len))
|
||||
goto exit;
|
||||
dev_path[header->dev_path_len] = 0;
|
||||
|
||||
ret = -EINVAL;
|
||||
switch (cmd->cmd_op) {
|
||||
case UBLK_CMD_GET_DEV_INFO:
|
||||
case UBLK_CMD_GET_DEV_INFO2:
|
||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||
case UBLK_CMD_GET_PARAMS:
|
||||
mask = MAY_READ;
|
||||
break;
|
||||
case UBLK_CMD_START_DEV:
|
||||
case UBLK_CMD_STOP_DEV:
|
||||
case UBLK_CMD_ADD_DEV:
|
||||
case UBLK_CMD_DEL_DEV:
|
||||
case UBLK_CMD_SET_PARAMS:
|
||||
case UBLK_CMD_START_USER_RECOVERY:
|
||||
case UBLK_CMD_END_USER_RECOVERY:
|
||||
mask = MAY_READ | MAY_WRITE;
|
||||
break;
|
||||
default:
|
||||
goto exit;
|
||||
}
|
||||
|
||||
ret = ublk_char_dev_permission(ub, dev_path, mask);
|
||||
if (!ret) {
|
||||
header->len -= header->dev_path_len;
|
||||
header->addr += header->dev_path_len;
|
||||
}
|
||||
pr_devel("%s: dev id %d cmd_op %x uid %d gid %d path %s ret %d\n",
|
||||
__func__, ub->ub_number, cmd->cmd_op,
|
||||
ub->dev_info.owner_uid, ub->dev_info.owner_gid,
|
||||
dev_path, ret);
|
||||
exit:
|
||||
kfree(dev_path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1990,6 +2169,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
|
||||
struct ublk_device *ub = NULL;
|
||||
int ret = -EINVAL;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
@ -2000,45 +2180,61 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
|
||||
if (!(issue_flags & IO_URING_F_SQE128))
|
||||
goto out;
|
||||
|
||||
ret = -EPERM;
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
if (cmd->cmd_op != UBLK_CMD_ADD_DEV) {
|
||||
ret = -ENODEV;
|
||||
ub = ublk_get_device_from_id(header->dev_id);
|
||||
if (!ub)
|
||||
goto out;
|
||||
|
||||
ret = ublk_ctrl_uring_cmd_permission(ub, cmd);
|
||||
} else {
|
||||
/* ADD_DEV permission check is done in command handler */
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (ret)
|
||||
goto put_dev;
|
||||
|
||||
ret = -ENODEV;
|
||||
switch (cmd->cmd_op) {
|
||||
case UBLK_CMD_START_DEV:
|
||||
ret = ublk_ctrl_start_dev(cmd);
|
||||
ret = ublk_ctrl_start_dev(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_STOP_DEV:
|
||||
ret = ublk_ctrl_stop_dev(cmd);
|
||||
ret = ublk_ctrl_stop_dev(ub);
|
||||
break;
|
||||
case UBLK_CMD_GET_DEV_INFO:
|
||||
ret = ublk_ctrl_get_dev_info(cmd);
|
||||
case UBLK_CMD_GET_DEV_INFO2:
|
||||
ret = ublk_ctrl_get_dev_info(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_ADD_DEV:
|
||||
ret = ublk_ctrl_add_dev(cmd);
|
||||
break;
|
||||
case UBLK_CMD_DEL_DEV:
|
||||
ret = ublk_ctrl_del_dev(header->dev_id);
|
||||
ret = ublk_ctrl_del_dev(&ub);
|
||||
break;
|
||||
case UBLK_CMD_GET_QUEUE_AFFINITY:
|
||||
ret = ublk_ctrl_get_queue_affinity(cmd);
|
||||
ret = ublk_ctrl_get_queue_affinity(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_GET_PARAMS:
|
||||
ret = ublk_ctrl_get_params(cmd);
|
||||
ret = ublk_ctrl_get_params(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_SET_PARAMS:
|
||||
ret = ublk_ctrl_set_params(cmd);
|
||||
ret = ublk_ctrl_set_params(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_START_USER_RECOVERY:
|
||||
ret = ublk_ctrl_start_recovery(cmd);
|
||||
ret = ublk_ctrl_start_recovery(ub, cmd);
|
||||
break;
|
||||
case UBLK_CMD_END_USER_RECOVERY:
|
||||
ret = ublk_ctrl_end_recovery(cmd);
|
||||
ret = ublk_ctrl_end_recovery(ub, cmd);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOTSUPP;
|
||||
break;
|
||||
}
|
||||
|
||||
put_dev:
|
||||
if (ub)
|
||||
ublk_put_device(ub);
|
||||
out:
|
||||
io_uring_cmd_done(cmd, ret, 0);
|
||||
pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n",
|
||||
@ -2105,5 +2301,8 @@ static void __exit ublk_exit(void)
|
||||
module_init(ublk_init);
|
||||
module_exit(ublk_exit);
|
||||
|
||||
module_param(ublks_max, int, 0444);
|
||||
MODULE_PARM_DESC(ublks_max, "max number of ublk devices allowed to add(default: 64)");
|
||||
|
||||
MODULE_AUTHOR("Ming Lei <ming.lei@redhat.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
@ -170,9 +170,7 @@ static int virtblk_setup_discard_write_zeroes_erase(struct request *req, bool un
|
||||
|
||||
WARN_ON_ONCE(n != segments);
|
||||
|
||||
req->special_vec.bv_page = virt_to_page(range);
|
||||
req->special_vec.bv_offset = offset_in_page(range);
|
||||
req->special_vec.bv_len = sizeof(*range) * segments;
|
||||
bvec_set_virt(&req->special_vec, range, sizeof(*range) * segments);
|
||||
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
return 0;
|
||||
|
@ -703,9 +703,7 @@ static ssize_t writeback_store(struct device *dev,
|
||||
for (; nr_pages != 0; index++, nr_pages--) {
|
||||
struct bio_vec bvec;
|
||||
|
||||
bvec.bv_page = page;
|
||||
bvec.bv_len = PAGE_SIZE;
|
||||
bvec.bv_offset = 0;
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
|
||||
spin_lock(&zram->wb_limit_lock);
|
||||
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
|
||||
@ -1380,12 +1378,9 @@ out:
|
||||
static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
|
||||
u32 index, struct bio *bio, bool partial_io)
|
||||
{
|
||||
struct bio_vec bvec = {
|
||||
.bv_page = page,
|
||||
.bv_len = PAGE_SIZE,
|
||||
.bv_offset = 0,
|
||||
};
|
||||
struct bio_vec bvec;
|
||||
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
|
||||
partial_io);
|
||||
}
|
||||
@ -1652,9 +1647,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
|
||||
memcpy_from_bvec(dst + offset, bvec);
|
||||
kunmap_atomic(dst);
|
||||
|
||||
vec.bv_page = page;
|
||||
vec.bv_len = PAGE_SIZE;
|
||||
vec.bv_offset = 0;
|
||||
bvec_set_page(&vec, page, PAGE_SIZE, 0);
|
||||
}
|
||||
|
||||
ret = __zram_bvec_write(zram, &vec, index, bio);
|
||||
|
@ -380,6 +380,10 @@ EXPORT_SYMBOL_GPL(md_new_event);
|
||||
static LIST_HEAD(all_mddevs);
|
||||
static DEFINE_SPINLOCK(all_mddevs_lock);
|
||||
|
||||
static bool is_md_suspended(struct mddev *mddev)
|
||||
{
|
||||
return percpu_ref_is_dying(&mddev->active_io);
|
||||
}
|
||||
/* Rather than calling directly into the personality make_request function,
|
||||
* IO requests come here first so that we can check if the device is
|
||||
* being suspended pending a reconfiguration.
|
||||
@ -389,7 +393,7 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
|
||||
*/
|
||||
static bool is_suspended(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
if (mddev->suspended)
|
||||
if (is_md_suspended(mddev))
|
||||
return true;
|
||||
if (bio_data_dir(bio) != WRITE)
|
||||
return false;
|
||||
@ -405,12 +409,10 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
|
||||
void md_handle_request(struct mddev *mddev, struct bio *bio)
|
||||
{
|
||||
check_suspended:
|
||||
rcu_read_lock();
|
||||
if (is_suspended(mddev, bio)) {
|
||||
DEFINE_WAIT(__wait);
|
||||
/* Bail out if REQ_NOWAIT is set for the bio */
|
||||
if (bio->bi_opf & REQ_NOWAIT) {
|
||||
rcu_read_unlock();
|
||||
bio_wouldblock_error(bio);
|
||||
return;
|
||||
}
|
||||
@ -419,23 +421,19 @@ check_suspended:
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
if (!is_suspended(mddev, bio))
|
||||
break;
|
||||
rcu_read_unlock();
|
||||
schedule();
|
||||
rcu_read_lock();
|
||||
}
|
||||
finish_wait(&mddev->sb_wait, &__wait);
|
||||
}
|
||||
atomic_inc(&mddev->active_io);
|
||||
rcu_read_unlock();
|
||||
if (!percpu_ref_tryget_live(&mddev->active_io))
|
||||
goto check_suspended;
|
||||
|
||||
if (!mddev->pers->make_request(mddev, bio)) {
|
||||
atomic_dec(&mddev->active_io);
|
||||
wake_up(&mddev->sb_wait);
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
goto check_suspended;
|
||||
}
|
||||
|
||||
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
|
||||
wake_up(&mddev->sb_wait);
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
EXPORT_SYMBOL(md_handle_request);
|
||||
|
||||
@ -483,11 +481,10 @@ void mddev_suspend(struct mddev *mddev)
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
if (mddev->suspended++)
|
||||
return;
|
||||
synchronize_rcu();
|
||||
wake_up(&mddev->sb_wait);
|
||||
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
smp_mb__after_atomic();
|
||||
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
|
||||
percpu_ref_kill(&mddev->active_io);
|
||||
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
|
||||
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
|
||||
@ -505,6 +502,7 @@ void mddev_resume(struct mddev *mddev)
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
if (--mddev->suspended)
|
||||
return;
|
||||
percpu_ref_resurrect(&mddev->active_io);
|
||||
wake_up(&mddev->sb_wait);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
|
||||
@ -683,7 +681,6 @@ void mddev_init(struct mddev *mddev)
|
||||
timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0);
|
||||
atomic_set(&mddev->active, 1);
|
||||
atomic_set(&mddev->openers, 0);
|
||||
atomic_set(&mddev->active_io, 0);
|
||||
spin_lock_init(&mddev->lock);
|
||||
atomic_set(&mddev->flush_pending, 0);
|
||||
init_waitqueue_head(&mddev->sb_wait);
|
||||
@ -5760,6 +5757,12 @@ static void md_safemode_timeout(struct timer_list *t)
|
||||
}
|
||||
|
||||
static int start_dirty_degraded;
|
||||
static void active_io_release(struct percpu_ref *ref)
|
||||
{
|
||||
struct mddev *mddev = container_of(ref, struct mddev, active_io);
|
||||
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
|
||||
int md_run(struct mddev *mddev)
|
||||
{
|
||||
@ -5840,10 +5843,15 @@ int md_run(struct mddev *mddev)
|
||||
nowait = nowait && bdev_nowait(rdev->bdev);
|
||||
}
|
||||
|
||||
err = percpu_ref_init(&mddev->active_io, active_io_release,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (!bioset_initialized(&mddev->bio_set)) {
|
||||
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
if (err)
|
||||
return err;
|
||||
goto exit_active_io;
|
||||
}
|
||||
if (!bioset_initialized(&mddev->sync_set)) {
|
||||
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
|
||||
@ -6031,6 +6039,8 @@ abort:
|
||||
bioset_exit(&mddev->sync_set);
|
||||
exit_bio_set:
|
||||
bioset_exit(&mddev->bio_set);
|
||||
exit_active_io:
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(md_run);
|
||||
@ -6156,7 +6166,7 @@ static void md_clean(struct mddev *mddev)
|
||||
mddev->new_level = LEVEL_NONE;
|
||||
mddev->new_layout = 0;
|
||||
mddev->new_chunk_sectors = 0;
|
||||
mddev->curr_resync = 0;
|
||||
mddev->curr_resync = MD_RESYNC_NONE;
|
||||
atomic64_set(&mddev->resync_mismatches, 0);
|
||||
mddev->suspend_lo = mddev->suspend_hi = 0;
|
||||
mddev->sync_speed_min = mddev->sync_speed_max = 0;
|
||||
@ -6219,7 +6229,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
|
||||
static void mddev_detach(struct mddev *mddev)
|
||||
{
|
||||
md_bitmap_wait_behind_writes(mddev);
|
||||
if (mddev->pers && mddev->pers->quiesce && !mddev->suspended) {
|
||||
if (mddev->pers && mddev->pers->quiesce && !is_md_suspended(mddev)) {
|
||||
mddev->pers->quiesce(mddev, 1);
|
||||
mddev->pers->quiesce(mddev, 0);
|
||||
}
|
||||
@ -6255,6 +6265,8 @@ void md_stop(struct mddev *mddev)
|
||||
*/
|
||||
__md_stop_writes(mddev);
|
||||
__md_stop(mddev);
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
}
|
||||
@ -7828,6 +7840,7 @@ static void md_free_disk(struct gendisk *disk)
|
||||
struct mddev *mddev = disk->private_data;
|
||||
|
||||
percpu_ref_exit(&mddev->writes_pending);
|
||||
percpu_ref_exit(&mddev->active_io);
|
||||
bioset_exit(&mddev->bio_set);
|
||||
bioset_exit(&mddev->sync_set);
|
||||
|
||||
@ -8531,7 +8544,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
|
||||
return true;
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
|
||||
mddev->suspended);
|
||||
is_md_suspended(mddev));
|
||||
if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
|
||||
percpu_ref_put(&mddev->writes_pending);
|
||||
return false;
|
||||
@ -8615,12 +8628,15 @@ static void md_end_io_acct(struct bio *bio)
|
||||
{
|
||||
struct md_io_acct *md_io_acct = bio->bi_private;
|
||||
struct bio *orig_bio = md_io_acct->orig_bio;
|
||||
struct mddev *mddev = md_io_acct->mddev;
|
||||
|
||||
orig_bio->bi_status = bio->bi_status;
|
||||
|
||||
bio_end_io_acct(orig_bio, md_io_acct->start_time);
|
||||
bio_put(bio);
|
||||
bio_endio(orig_bio);
|
||||
|
||||
percpu_ref_put(&mddev->active_io);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -8636,10 +8652,13 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
|
||||
if (!blk_queue_io_stat(bdev->bd_disk->queue))
|
||||
return;
|
||||
|
||||
percpu_ref_get(&mddev->active_io);
|
||||
|
||||
clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
|
||||
md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
|
||||
md_io_acct->orig_bio = *bio;
|
||||
md_io_acct->start_time = bio_start_io_acct(*bio);
|
||||
md_io_acct->mddev = mddev;
|
||||
|
||||
clone->bi_end_io = md_end_io_acct;
|
||||
clone->bi_private = md_io_acct;
|
||||
@ -8883,7 +8902,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
atomic_set(&mddev->recovery_active, 0);
|
||||
last_check = 0;
|
||||
|
||||
if (j>2) {
|
||||
if (j >= MD_RESYNC_ACTIVE) {
|
||||
pr_debug("md: resuming %s of %s from checkpoint.\n",
|
||||
desc, mdname(mddev));
|
||||
mddev->curr_resync = j;
|
||||
@ -8955,7 +8974,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
if (j > max_sectors)
|
||||
/* when skipping, extra large numbers can be returned. */
|
||||
j = max_sectors;
|
||||
if (j > 2)
|
||||
if (j >= MD_RESYNC_ACTIVE)
|
||||
mddev->curr_resync = j;
|
||||
mddev->curr_mark_cnt = io_sectors;
|
||||
if (last_check == 0)
|
||||
@ -9030,7 +9049,7 @@ void md_do_sync(struct md_thread *thread)
|
||||
mddev->pers->sync_request(mddev, max_sectors, &skipped);
|
||||
|
||||
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
|
||||
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
|
||||
mddev->curr_resync > MD_RESYNC_ACTIVE) {
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
if (mddev->curr_resync >= mddev->recovery_cp) {
|
||||
@ -9259,7 +9278,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
wake_up(&mddev->sb_wait);
|
||||
}
|
||||
|
||||
if (mddev->suspended)
|
||||
if (is_md_suspended(mddev))
|
||||
return;
|
||||
|
||||
if (mddev->bitmap)
|
||||
|
@ -315,7 +315,7 @@ struct mddev {
|
||||
unsigned long sb_flags;
|
||||
|
||||
int suspended;
|
||||
atomic_t active_io;
|
||||
struct percpu_ref active_io;
|
||||
int ro;
|
||||
int sysfs_active; /* set when sysfs deletes
|
||||
* are happening, so run/
|
||||
@ -710,9 +710,10 @@ struct md_thread {
|
||||
};
|
||||
|
||||
struct md_io_acct {
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
struct bio bio_clone;
|
||||
struct mddev *mddev;
|
||||
struct bio *orig_bio;
|
||||
unsigned long start_time;
|
||||
struct bio bio_clone;
|
||||
};
|
||||
|
||||
#define THREAD_WAKEUP 0
|
||||
|
@ -160,7 +160,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
|
||||
|
||||
if (size > CHAP_BUF_SIZE) {
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hmac_name = nvme_auth_hmac_name(data->hashid);
|
||||
@ -169,7 +169,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
|
||||
"qid %d: invalid HASH ID %d\n",
|
||||
chap->qid, data->hashid);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
if (chap->hash_id == data->hashid && chap->shash_tfm &&
|
||||
@ -195,7 +195,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
|
||||
chap->qid, hmac_name, PTR_ERR(chap->shash_tfm));
|
||||
chap->shash_tfm = NULL;
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (crypto_shash_digestsize(chap->shash_tfm) != data->hl) {
|
||||
@ -205,7 +205,7 @@ static int nvme_auth_process_dhchap_challenge(struct nvme_ctrl *ctrl,
|
||||
crypto_free_shash(chap->shash_tfm);
|
||||
chap->shash_tfm = NULL;
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
chap->hash_id = data->hashid;
|
||||
@ -221,7 +221,7 @@ select_kpp:
|
||||
chap->qid, data->dhgid);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||
/* Leave previous dh_tfm intact */
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
if (chap->dhgroup_id == data->dhgid &&
|
||||
@ -244,7 +244,7 @@ select_kpp:
|
||||
"qid %d: empty DH value\n",
|
||||
chap->qid);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
chap->dh_tfm = crypto_alloc_kpp(kpp_name, 0, 0);
|
||||
@ -256,7 +256,7 @@ select_kpp:
|
||||
chap->qid, ret, gid_name);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_DHGROUP_UNUSABLE;
|
||||
chap->dh_tfm = NULL;
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -ret;
|
||||
}
|
||||
dev_dbg(ctrl->device, "qid %d: selected DH group %s\n",
|
||||
chap->qid, gid_name);
|
||||
@ -265,7 +265,7 @@ select_kpp:
|
||||
"qid %d: invalid DH value for NULL DH\n",
|
||||
chap->qid);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EPROTO;
|
||||
}
|
||||
chap->dhgroup_id = data->dhgid;
|
||||
|
||||
@ -276,7 +276,7 @@ skip_kpp:
|
||||
chap->ctrl_key = kmalloc(dhvlen, GFP_KERNEL);
|
||||
if (!chap->ctrl_key) {
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -ENOMEM;
|
||||
}
|
||||
chap->ctrl_key_len = dhvlen;
|
||||
memcpy(chap->ctrl_key, data->cval + chap->hash_len,
|
||||
@ -346,7 +346,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
|
||||
|
||||
if (size > CHAP_BUF_SIZE) {
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_INCORRECT_PAYLOAD;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (data->hl != chap->hash_len) {
|
||||
@ -354,7 +354,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
|
||||
"qid %d: invalid hash length %u\n",
|
||||
chap->qid, data->hl);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE;
|
||||
return NVME_SC_INVALID_FIELD;
|
||||
return -EPROTO;
|
||||
}
|
||||
|
||||
/* Just print out information for the admin queue */
|
||||
@ -378,7 +378,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl,
|
||||
"qid %d: controller authentication failed\n",
|
||||
chap->qid);
|
||||
chap->status = NVME_AUTH_DHCHAP_FAILURE_FAILED;
|
||||
return NVME_SC_AUTH_REQUIRED;
|
||||
return -ECONNREFUSED;
|
||||
}
|
||||
|
||||
/* Just print out information for the admin queue */
|
||||
@ -732,7 +732,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
|
||||
NVME_AUTH_DHCHAP_MESSAGE_CHALLENGE);
|
||||
if (ret) {
|
||||
chap->status = ret;
|
||||
chap->error = NVME_SC_AUTH_REQUIRED;
|
||||
chap->error = -ECONNREFUSED;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -800,7 +800,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
|
||||
NVME_AUTH_DHCHAP_MESSAGE_SUCCESS1);
|
||||
if (ret) {
|
||||
chap->status = ret;
|
||||
chap->error = NVME_SC_AUTH_REQUIRED;
|
||||
chap->error = -ECONNREFUSED;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -821,7 +821,7 @@ static void nvme_queue_auth_work(struct work_struct *work)
|
||||
ret = nvme_auth_process_dhchap_success1(ctrl, chap);
|
||||
if (ret) {
|
||||
/* Controller authentication failed */
|
||||
chap->error = NVME_SC_AUTH_REQUIRED;
|
||||
chap->error = -ECONNREFUSED;
|
||||
goto fail2;
|
||||
}
|
||||
|
||||
|
@ -54,6 +54,14 @@ static const char * const nvme_admin_ops[] = {
|
||||
[nvme_admin_get_lba_status] = "Get LBA Status",
|
||||
};
|
||||
|
||||
static const char * const nvme_fabrics_ops[] = {
|
||||
[nvme_fabrics_type_property_set] = "Property Set",
|
||||
[nvme_fabrics_type_property_get] = "Property Get",
|
||||
[nvme_fabrics_type_connect] = "Connect",
|
||||
[nvme_fabrics_type_auth_send] = "Authentication Send",
|
||||
[nvme_fabrics_type_auth_receive] = "Authentication Receive",
|
||||
};
|
||||
|
||||
static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_SUCCESS] = "Success",
|
||||
[NVME_SC_INVALID_OPCODE] = "Invalid Command Opcode",
|
||||
@ -185,3 +193,11 @@ const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
return nvme_admin_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_get_admin_opcode_str);
|
||||
|
||||
const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode) {
|
||||
if (opcode < ARRAY_SIZE(nvme_fabrics_ops) && nvme_fabrics_ops[opcode])
|
||||
return nvme_fabrics_ops[opcode];
|
||||
return "Unknown";
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_get_fabrics_opcode_str);
|
||||
|
@ -806,9 +806,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
|
||||
cmnd->dsm.nr = cpu_to_le32(segments - 1);
|
||||
cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
|
||||
|
||||
req->special_vec.bv_page = virt_to_page(range);
|
||||
req->special_vec.bv_offset = offset_in_page(range);
|
||||
req->special_vec.bv_len = alloc_size;
|
||||
bvec_set_virt(&req->special_vec, range, alloc_size);
|
||||
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
return BLK_STS_OK;
|
||||
@ -1004,7 +1002,7 @@ EXPORT_SYMBOL_GPL(nvme_setup_cmd);
|
||||
* >0: nvme controller's cqe status response
|
||||
* <0: kernel error in lieu of controller response
|
||||
*/
|
||||
static int nvme_execute_rq(struct request *rq, bool at_head)
|
||||
int nvme_execute_rq(struct request *rq, bool at_head)
|
||||
{
|
||||
blk_status_t status;
|
||||
|
||||
@ -1015,6 +1013,7 @@ static int nvme_execute_rq(struct request *rq, bool at_head)
|
||||
return nvme_req(rq)->status;
|
||||
return blk_status_to_errno(status);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, NVME_TARGET_PASSTHRU);
|
||||
|
||||
/*
|
||||
* Returns 0 on success. If the result is negative, it's a Linux error code;
|
||||
@ -1060,41 +1059,12 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd);
|
||||
|
||||
static u32 nvme_known_admin_effects(u8 opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nvme_admin_format_nvm:
|
||||
return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_NCC |
|
||||
NVME_CMD_EFFECTS_CSE_MASK;
|
||||
case nvme_admin_sanitize_nvm:
|
||||
return NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 nvme_known_nvm_effects(u8 opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nvme_cmd_write:
|
||||
case nvme_cmd_write_zeroes:
|
||||
case nvme_cmd_write_uncor:
|
||||
return NVME_CMD_EFFECTS_LBCC;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
|
||||
{
|
||||
u32 effects = 0;
|
||||
|
||||
if (ns) {
|
||||
if (ns->head->effects)
|
||||
effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
|
||||
if (ns->head->ids.csi == NVME_CSI_NVM)
|
||||
effects |= nvme_known_nvm_effects(opcode);
|
||||
effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
|
||||
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
|
||||
dev_warn_once(ctrl->device,
|
||||
"IO command:%02x has unusual effects:%08x\n",
|
||||
@ -1107,17 +1077,14 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
|
||||
*/
|
||||
effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
|
||||
} else {
|
||||
if (ctrl->effects)
|
||||
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
|
||||
effects |= nvme_known_admin_effects(opcode);
|
||||
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
|
||||
}
|
||||
|
||||
return effects;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);
|
||||
|
||||
static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 opcode)
|
||||
u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
|
||||
{
|
||||
u32 effects = nvme_command_effects(ctrl, ns, opcode);
|
||||
|
||||
@ -1135,6 +1102,7 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
}
|
||||
return effects;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, NVME_TARGET_PASSTHRU);
|
||||
|
||||
void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
|
||||
struct nvme_command *cmd, int status)
|
||||
@ -1176,17 +1144,6 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU);
|
||||
|
||||
int nvme_execute_passthru_rq(struct request *rq, u32 *effects)
|
||||
{
|
||||
struct nvme_command *cmd = nvme_req(rq)->cmd;
|
||||
struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
|
||||
struct nvme_ns *ns = rq->q->queuedata;
|
||||
|
||||
*effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
|
||||
return nvme_execute_rq(rq, false);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);
|
||||
|
||||
/*
|
||||
* Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1:
|
||||
*
|
||||
@ -3122,6 +3079,62 @@ free_data:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvme_init_known_nvm_effects(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_effects_log *log = ctrl->effects;
|
||||
|
||||
log->acs[nvme_admin_format_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC |
|
||||
NVME_CMD_EFFECTS_NCC |
|
||||
NVME_CMD_EFFECTS_CSE_MASK);
|
||||
log->acs[nvme_admin_sanitize_nvm] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC |
|
||||
NVME_CMD_EFFECTS_CSE_MASK);
|
||||
|
||||
/*
|
||||
* The spec says the result of a security receive command depends on
|
||||
* the previous security send command. As such, many vendors log this
|
||||
* command as one to submitted only when no other commands to the same
|
||||
* namespace are outstanding. The intention is to tell the host to
|
||||
* prevent mixing security send and receive.
|
||||
*
|
||||
* This driver can only enforce such exclusive access against IO
|
||||
* queues, though. We are not readily able to enforce such a rule for
|
||||
* two commands to the admin queue, which is the only queue that
|
||||
* matters for this command.
|
||||
*
|
||||
* Rather than blindly freezing the IO queues for this effect that
|
||||
* doesn't even apply to IO, mask it off.
|
||||
*/
|
||||
log->acs[nvme_admin_security_recv] &= ~NVME_CMD_EFFECTS_CSE_MASK;
|
||||
|
||||
log->iocs[nvme_cmd_write] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
|
||||
log->iocs[nvme_cmd_write_zeroes] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
|
||||
log->iocs[nvme_cmd_write_uncor] |= cpu_to_le32(NVME_CMD_EFFECTS_LBCC);
|
||||
}
|
||||
|
||||
static int nvme_init_effects(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ctrl->effects)
|
||||
return 0;
|
||||
|
||||
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
|
||||
ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!ctrl->effects) {
|
||||
ctrl->effects = kzalloc(sizeof(*ctrl->effects), GFP_KERNEL);
|
||||
if (!ctrl->effects)
|
||||
return -ENOMEM;
|
||||
xa_store(&ctrl->cels, NVME_CSI_NVM, ctrl->effects, GFP_KERNEL);
|
||||
}
|
||||
|
||||
nvme_init_known_nvm_effects(ctrl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
struct nvme_id_ctrl *id;
|
||||
@ -3135,12 +3148,6 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (id->lpa & NVME_CTRL_LPA_CMD_EFFECTS_LOG) {
|
||||
ret = nvme_get_effects_log(ctrl, NVME_CSI_NVM, &ctrl->effects);
|
||||
if (ret < 0)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!(ctrl->ops->flags & NVME_F_FABRICS))
|
||||
ctrl->cntlid = le16_to_cpu(id->cntlid);
|
||||
|
||||
@ -3163,6 +3170,10 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
ret = nvme_init_subsystem(ctrl, id);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
ret = nvme_init_effects(ctrl, id);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
}
|
||||
memcpy(ctrl->subsys->firmware_rev, id->fr,
|
||||
sizeof(ctrl->subsys->firmware_rev));
|
||||
|
@ -410,7 +410,14 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
|
||||
|
||||
result = le32_to_cpu(res.u32);
|
||||
ctrl->cntlid = result & 0xFFFF;
|
||||
if ((result >> 16) & 0x3) {
|
||||
if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
|
||||
/* Secure concatenation is not implemented */
|
||||
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: secure concatenation is not supported\n");
|
||||
ret = NVME_SC_AUTH_REQUIRED;
|
||||
goto out_free_data;
|
||||
}
|
||||
/* Authentication required */
|
||||
ret = nvme_auth_negotiate(ctrl, 0);
|
||||
if (ret) {
|
||||
@ -486,7 +493,14 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
||||
&cmd, data);
|
||||
}
|
||||
result = le32_to_cpu(res.u32);
|
||||
if ((result >> 16) & 2) {
|
||||
if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) {
|
||||
/* Secure concatenation is not implemented */
|
||||
if (result & NVME_CONNECT_AUTHREQ_ASCR) {
|
||||
dev_warn(ctrl->device,
|
||||
"qid 0: secure concatenation is not supported\n");
|
||||
ret = NVME_SC_AUTH_REQUIRED;
|
||||
goto out_free_data;
|
||||
}
|
||||
/* Authentication required */
|
||||
ret = nvme_auth_negotiate(ctrl, qid);
|
||||
if (ret) {
|
||||
@ -500,6 +514,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
|
||||
"qid %u: authentication failed\n", qid);
|
||||
}
|
||||
}
|
||||
out_free_data:
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
@ -219,6 +219,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
|
||||
u64 *result, unsigned timeout, unsigned int flags)
|
||||
{
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
struct nvme_ctrl *ctrl;
|
||||
struct request *req;
|
||||
void *meta = NULL;
|
||||
@ -241,8 +242,8 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
bio = req->bio;
|
||||
ctrl = nvme_req(req)->ctrl;
|
||||
|
||||
ret = nvme_execute_passthru_rq(req, &effects);
|
||||
|
||||
effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
|
||||
ret = nvme_execute_rq(req, false);
|
||||
if (result)
|
||||
*result = le64_to_cpu(nvme_req(req)->result.u64);
|
||||
if (meta)
|
||||
@ -554,7 +555,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
struct nvme_uring_data d;
|
||||
struct nvme_command c;
|
||||
struct request *req;
|
||||
blk_opf_t rq_flags = 0;
|
||||
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
|
||||
blk_mq_req_flags_t blk_flags = 0;
|
||||
void *meta = NULL;
|
||||
int ret;
|
||||
@ -590,7 +591,7 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK) {
|
||||
rq_flags = REQ_NOWAIT;
|
||||
rq_flags |= REQ_NOWAIT;
|
||||
blk_flags = BLK_MQ_REQ_NOWAIT;
|
||||
}
|
||||
if (issue_flags & IO_URING_F_IOPOLL)
|
||||
|
@ -1070,7 +1070,8 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {};
|
||||
|
||||
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
u8 opcode);
|
||||
int nvme_execute_passthru_rq(struct request *rq, u32 *effects);
|
||||
u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode);
|
||||
int nvme_execute_rq(struct request *rq, bool at_head);
|
||||
void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
|
||||
struct nvme_command *cmd, int status);
|
||||
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
|
||||
@ -1086,6 +1087,7 @@ static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
|
||||
const unsigned char *nvme_get_error_status_str(u16 status);
|
||||
const unsigned char *nvme_get_opcode_str(u8 opcode);
|
||||
const unsigned char *nvme_get_admin_opcode_str(u8 opcode);
|
||||
const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode);
|
||||
#else /* CONFIG_NVME_VERBOSE_ERRORS */
|
||||
static inline const unsigned char *nvme_get_error_status_str(u16 status)
|
||||
{
|
||||
@ -1099,6 +1101,18 @@ static inline const unsigned char *nvme_get_admin_opcode_str(u8 opcode)
|
||||
{
|
||||
return "Admin Cmd";
|
||||
}
|
||||
|
||||
static inline const unsigned char *nvme_get_fabrics_opcode_str(u8 opcode)
|
||||
{
|
||||
return "Fabrics Cmd";
|
||||
}
|
||||
#endif /* CONFIG_NVME_VERBOSE_ERRORS */
|
||||
|
||||
static inline const unsigned char *nvme_opcode_str(int qid, u8 opcode, u8 fctype)
|
||||
{
|
||||
if (opcode == nvme_fabrics_command)
|
||||
return nvme_get_fabrics_opcode_str(fctype);
|
||||
return qid ? nvme_get_opcode_str(opcode) :
|
||||
nvme_get_admin_opcode_str(opcode);
|
||||
}
|
||||
#endif /* _NVME_H */
|
||||
|
@ -42,8 +42,9 @@
|
||||
* These can be higher, but we need to ensure that any command doesn't
|
||||
* require an sg allocation that needs more than a page of data.
|
||||
*/
|
||||
#define NVME_MAX_KB_SZ 4096
|
||||
#define NVME_MAX_SEGS 127
|
||||
#define NVME_MAX_KB_SZ 8192
|
||||
#define NVME_MAX_SEGS 128
|
||||
#define NVME_MAX_NR_ALLOCATIONS 5
|
||||
|
||||
static int use_threaded_interrupts;
|
||||
module_param(use_threaded_interrupts, int, 0444);
|
||||
@ -216,6 +217,11 @@ struct nvme_queue {
|
||||
struct completion delete_done;
|
||||
};
|
||||
|
||||
union nvme_descriptor {
|
||||
struct nvme_sgl_desc *sg_list;
|
||||
__le64 *prp_list;
|
||||
};
|
||||
|
||||
/*
|
||||
* The nvme_iod describes the data in an I/O.
|
||||
*
|
||||
@ -225,7 +231,6 @@ struct nvme_queue {
|
||||
struct nvme_iod {
|
||||
struct nvme_request req;
|
||||
struct nvme_command cmd;
|
||||
bool use_sgl;
|
||||
bool aborted;
|
||||
s8 nr_allocations; /* PRP list pool allocations. 0 means small
|
||||
pool in use */
|
||||
@ -233,6 +238,7 @@ struct nvme_iod {
|
||||
dma_addr_t first_dma;
|
||||
dma_addr_t meta_dma;
|
||||
struct sg_table sgt;
|
||||
union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
|
||||
};
|
||||
|
||||
static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
|
||||
@ -387,16 +393,6 @@ static int nvme_pci_npages_prp(void)
|
||||
return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculates the number of pages needed for the SGL segments. For example a 4k
|
||||
* page can accommodate 256 SGL descriptors.
|
||||
*/
|
||||
static int nvme_pci_npages_sgl(void)
|
||||
{
|
||||
return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
|
||||
NVME_CTRL_PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
@ -510,16 +506,10 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
|
||||
spin_unlock(&nvmeq->sq_lock);
|
||||
}
|
||||
|
||||
static void **nvme_pci_iod_list(struct request *req)
|
||||
{
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
return (void **)(iod->sgt.sgl + blk_rq_nr_phys_segments(req));
|
||||
}
|
||||
|
||||
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
|
||||
static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
|
||||
int nseg)
|
||||
{
|
||||
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
|
||||
int nseg = blk_rq_nr_phys_segments(req);
|
||||
unsigned int avg_seg_size;
|
||||
|
||||
avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
|
||||
@ -541,7 +531,7 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
|
||||
int i;
|
||||
|
||||
for (i = 0; i < iod->nr_allocations; i++) {
|
||||
__le64 *prp_list = nvme_pci_iod_list(req)[i];
|
||||
__le64 *prp_list = iod->list[i].prp_list;
|
||||
dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
|
||||
|
||||
dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
|
||||
@ -549,22 +539,6 @@ static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
|
||||
{
|
||||
const int last_sg = SGES_PER_PAGE - 1;
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
dma_addr_t dma_addr = iod->first_dma;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < iod->nr_allocations; i++) {
|
||||
struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
|
||||
dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);
|
||||
|
||||
dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
|
||||
dma_addr = next_dma_addr;
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
|
||||
{
|
||||
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
|
||||
@ -580,10 +554,11 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
|
||||
dma_unmap_sgtable(dev->dev, &iod->sgt, rq_dma_dir(req), 0);
|
||||
|
||||
if (iod->nr_allocations == 0)
|
||||
dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
|
||||
dma_pool_free(dev->prp_small_pool, iod->list[0].sg_list,
|
||||
iod->first_dma);
|
||||
else if (iod->nr_allocations == 1)
|
||||
dma_pool_free(dev->prp_page_pool, iod->list[0].sg_list,
|
||||
iod->first_dma);
|
||||
else if (iod->use_sgl)
|
||||
nvme_free_sgls(dev, req);
|
||||
else
|
||||
nvme_free_prps(dev, req);
|
||||
mempool_free(iod->sgt.sgl, dev->iod_mempool);
|
||||
@ -614,7 +589,6 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
||||
u64 dma_addr = sg_dma_address(sg);
|
||||
int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
|
||||
__le64 *prp_list;
|
||||
void **list = nvme_pci_iod_list(req);
|
||||
dma_addr_t prp_dma;
|
||||
int nprps, i;
|
||||
|
||||
@ -652,7 +626,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
||||
iod->nr_allocations = -1;
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
list[0] = prp_list;
|
||||
iod->list[0].prp_list = prp_list;
|
||||
iod->first_dma = prp_dma;
|
||||
i = 0;
|
||||
for (;;) {
|
||||
@ -661,7 +635,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
|
||||
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
|
||||
if (!prp_list)
|
||||
goto free_prps;
|
||||
list[iod->nr_allocations++] = prp_list;
|
||||
iod->list[iod->nr_allocations++].prp_list = prp_list;
|
||||
prp_list[0] = old_prp_list[i - 1];
|
||||
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
|
||||
i = 1;
|
||||
@ -706,13 +680,8 @@ static void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
|
||||
dma_addr_t dma_addr, int entries)
|
||||
{
|
||||
sge->addr = cpu_to_le64(dma_addr);
|
||||
if (entries < SGES_PER_PAGE) {
|
||||
sge->length = cpu_to_le32(entries * sizeof(*sge));
|
||||
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
|
||||
} else {
|
||||
sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE);
|
||||
sge->type = NVME_SGL_FMT_SEG_DESC << 4;
|
||||
}
|
||||
sge->length = cpu_to_le32(entries * sizeof(*sge));
|
||||
sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
|
||||
}
|
||||
|
||||
static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
|
||||
@ -748,34 +717,16 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
nvme_pci_iod_list(req)[0] = sg_list;
|
||||
iod->list[0].sg_list = sg_list;
|
||||
iod->first_dma = sgl_dma;
|
||||
|
||||
nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
|
||||
|
||||
do {
|
||||
if (i == SGES_PER_PAGE) {
|
||||
struct nvme_sgl_desc *old_sg_desc = sg_list;
|
||||
struct nvme_sgl_desc *link = &old_sg_desc[i - 1];
|
||||
|
||||
sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
|
||||
if (!sg_list)
|
||||
goto free_sgls;
|
||||
|
||||
i = 0;
|
||||
nvme_pci_iod_list(req)[iod->nr_allocations++] = sg_list;
|
||||
sg_list[i++] = *link;
|
||||
nvme_pci_sgl_set_seg(link, sgl_dma, entries);
|
||||
}
|
||||
|
||||
nvme_pci_sgl_set_data(&sg_list[i++], sg);
|
||||
sg = sg_next(sg);
|
||||
} while (--entries > 0);
|
||||
|
||||
return BLK_STS_OK;
|
||||
free_sgls:
|
||||
nvme_free_sgls(dev, req);
|
||||
return BLK_STS_RESOURCE;
|
||||
}
|
||||
|
||||
static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
|
||||
@ -857,8 +808,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
|
||||
goto out_free_sg;
|
||||
}
|
||||
|
||||
iod->use_sgl = nvme_pci_use_sgls(dev, req);
|
||||
if (iod->use_sgl)
|
||||
if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
|
||||
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
|
||||
else
|
||||
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
|
||||
@ -2706,11 +2656,8 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
|
||||
|
||||
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
|
||||
{
|
||||
size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
|
||||
size_t alloc_size = sizeof(__le64 *) * npages +
|
||||
sizeof(struct scatterlist) * NVME_MAX_SEGS;
|
||||
size_t alloc_size = sizeof(struct scatterlist) * NVME_MAX_SEGS;
|
||||
|
||||
WARN_ON_ONCE(alloc_size > PAGE_SIZE);
|
||||
dev->iod_mempool = mempool_create_node(1,
|
||||
mempool_kmalloc, mempool_kfree,
|
||||
(void *)alloc_size, GFP_KERNEL,
|
||||
@ -3538,8 +3485,9 @@ static int __init nvme_init(void)
|
||||
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
|
||||
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
|
||||
BUILD_BUG_ON(DIV_ROUND_UP(nvme_pci_npages_prp(), NVME_CTRL_PAGE_SIZE) >
|
||||
S8_MAX);
|
||||
BUILD_BUG_ON(NVME_MAX_SEGS > SGES_PER_PAGE);
|
||||
BUILD_BUG_ON(sizeof(struct scatterlist) * NVME_MAX_SEGS > PAGE_SIZE);
|
||||
BUILD_BUG_ON(nvme_pci_npages_prp() > NVME_MAX_NR_ALLOCATIONS);
|
||||
|
||||
return pci_register_driver(&nvme_driver);
|
||||
}
|
||||
|
@ -2282,10 +2282,13 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
|
||||
struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
|
||||
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
|
||||
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
|
||||
u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
|
||||
int qid = nvme_tcp_queue_id(req->queue);
|
||||
|
||||
dev_warn(ctrl->device,
|
||||
"queue %d: timeout request %#x type %d\n",
|
||||
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
|
||||
"queue %d: timeout cid %#x type %d opcode %#x (%s)\n",
|
||||
nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
|
||||
opc, nvme_opcode_str(qid, opc, fctype));
|
||||
|
||||
if (ctrl->state != NVME_CTRL_LIVE) {
|
||||
/*
|
||||
|
@ -840,7 +840,7 @@ void nvmet_execute_set_features(struct nvmet_req *req)
|
||||
u16 nsqr;
|
||||
u16 ncqr;
|
||||
|
||||
if (!nvmet_check_transfer_len(req, 0))
|
||||
if (!nvmet_check_data_len_lte(req, 0))
|
||||
return;
|
||||
|
||||
switch (cdw10 & 0xff) {
|
||||
|
@ -73,13 +73,6 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
|
||||
{
|
||||
bv->bv_page = sg_page(sg);
|
||||
bv->bv_offset = sg->offset;
|
||||
bv->bv_len = sg->length;
|
||||
}
|
||||
|
||||
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
|
||||
unsigned long nr_segs, size_t count, int ki_flags)
|
||||
{
|
||||
@ -146,7 +139,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
|
||||
|
||||
memset(&req->f.iocb, 0, sizeof(struct kiocb));
|
||||
for_each_sg(req->sg, sg, req->sg_cnt, i) {
|
||||
nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
|
||||
bvec_set_page(&req->f.bvec[bv_cnt], sg_page(sg), sg->length,
|
||||
sg->offset);
|
||||
len += req->f.bvec[bv_cnt].bv_len;
|
||||
total_len += req->f.bvec[bv_cnt].bv_len;
|
||||
bv_cnt++;
|
||||
|
@ -216,11 +216,12 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
|
||||
struct nvmet_req *req = container_of(w, struct nvmet_req, p.work);
|
||||
struct request *rq = req->p.rq;
|
||||
struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
|
||||
struct nvme_ns *ns = rq->q->queuedata;
|
||||
u32 effects;
|
||||
int status;
|
||||
|
||||
status = nvme_execute_passthru_rq(rq, &effects);
|
||||
|
||||
effects = nvme_passthru_start(ctrl, ns, req->cmd->common.opcode);
|
||||
status = nvme_execute_rq(rq, false);
|
||||
if (status == NVME_SC_SUCCESS &&
|
||||
req->cmd->common.opcode == nvme_admin_identify) {
|
||||
switch (req->cmd->identify.cns) {
|
||||
|
@ -321,9 +321,8 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
|
||||
while (length) {
|
||||
u32 iov_len = min_t(u32, length, sg->length - sg_offset);
|
||||
|
||||
iov->bv_page = sg_page(sg);
|
||||
iov->bv_len = sg->length;
|
||||
iov->bv_offset = sg->offset + sg_offset;
|
||||
bvec_set_page(iov, sg_page(sg), sg->length,
|
||||
sg->offset + sg_offset);
|
||||
|
||||
length -= iov_len;
|
||||
sg = sg_next(sg);
|
||||
|
@ -254,8 +254,7 @@ static unsigned long nvmet_req_nr_zones_from_slba(struct nvmet_req *req)
|
||||
{
|
||||
unsigned int sect = nvmet_lba_to_sect(req->ns, req->cmd->zmr.slba);
|
||||
|
||||
return bdev_nr_zones(req->ns->bdev) -
|
||||
(sect >> ilog2(bdev_zone_sectors(req->ns->bdev)));
|
||||
return bdev_nr_zones(req->ns->bdev) - bdev_zone_no(req->ns->bdev, sect);
|
||||
}
|
||||
|
||||
static unsigned long get_nr_zones_from_buf(struct nvmet_req *req, u32 bufsize)
|
||||
|
@ -3978,7 +3978,7 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
|
||||
|
||||
ccw = cqr->cpaddr;
|
||||
ccw->cmd_code = CCW_CMD_RDC;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
ccw->flags = 0;
|
||||
ccw->count = rdc_buffer_size;
|
||||
cqr->startdev = device;
|
||||
@ -4022,8 +4022,7 @@ char *dasd_get_sense(struct irb *irb)
|
||||
|
||||
if (scsw_is_tm(&irb->scsw) && (irb->scsw.tm.fcxs == 0x01)) {
|
||||
if (irb->scsw.tm.tcw)
|
||||
tsb = tcw_get_tsb((struct tcw *)(unsigned long)
|
||||
irb->scsw.tm.tcw);
|
||||
tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw));
|
||||
if (tsb && tsb->length == 64 && tsb->flags)
|
||||
switch (tsb->flags & 0x07) {
|
||||
case 1: /* tsa_iostat */
|
||||
|
@ -220,7 +220,7 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier)
|
||||
memset(ccw, 0, sizeof(struct ccw1));
|
||||
ccw->cmd_code = CCW_CMD_DCTL;
|
||||
ccw->count = 4;
|
||||
ccw->cda = (__u32)(addr_t) DCTL_data;
|
||||
ccw->cda = (__u32)virt_to_phys(DCTL_data);
|
||||
dctl_cqr->flags = erp->flags;
|
||||
dctl_cqr->function = dasd_3990_erp_DCTL;
|
||||
dctl_cqr->refers = erp;
|
||||
@ -1714,7 +1714,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT;
|
||||
ccw->flags = CCW_FLAG_CC;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32)(addr_t) DE_data;
|
||||
ccw->cda = (__u32)virt_to_phys(DE_data);
|
||||
|
||||
/* create LO ccw */
|
||||
ccw++;
|
||||
@ -1722,7 +1722,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_LOCATE_RECORD;
|
||||
ccw->flags = CCW_FLAG_CC;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32)(addr_t) LO_data;
|
||||
ccw->cda = (__u32)virt_to_phys(LO_data);
|
||||
|
||||
/* TIC to the failed ccw */
|
||||
ccw++;
|
||||
@ -2419,7 +2419,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr)
|
||||
tcw = erp->cpaddr;
|
||||
tsb = (struct tsb *) &tcw[1];
|
||||
*tcw = *((struct tcw *)cqr->cpaddr);
|
||||
tcw->tsb = (long)tsb;
|
||||
tcw->tsb = virt_to_phys(tsb);
|
||||
} else if (ccw->cmd_code == DASD_ECKD_CCW_PSF) {
|
||||
/* PSF cannot be chained from NOOP/TIC */
|
||||
erp->cpaddr = cqr->cpaddr;
|
||||
@ -2430,7 +2430,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr)
|
||||
ccw->flags = CCW_FLAG_CC;
|
||||
ccw++;
|
||||
ccw->cmd_code = CCW_CMD_TIC;
|
||||
ccw->cda = (long)(cqr->cpaddr);
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->cpaddr);
|
||||
}
|
||||
|
||||
erp->flags = cqr->flags;
|
||||
|
@ -443,7 +443,7 @@ static int read_unit_address_configuration(struct dasd_device *device,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = sizeof(struct dasd_psf_prssd_data);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t) prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - feature codes */
|
||||
memset(lcu->uac, 0, sizeof(*(lcu->uac)));
|
||||
@ -451,7 +451,7 @@ static int read_unit_address_configuration(struct dasd_device *device,
|
||||
ccw++;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(*(lcu->uac));
|
||||
ccw->cda = (__u32)(addr_t) lcu->uac;
|
||||
ccw->cda = (__u32)virt_to_phys(lcu->uac);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -747,7 +747,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSCK;
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
((char *)cqr->data)[0] = reason;
|
||||
|
||||
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
|
||||
|
@ -288,7 +288,7 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32)__pa(data);
|
||||
ccw->cda = (__u32)virt_to_phys(data);
|
||||
}
|
||||
|
||||
memset(data, 0, sizeof(struct DE_eckd_data));
|
||||
@ -398,7 +398,7 @@ static void locate_record_ext(struct ccw1 *ccw, struct LRE_eckd_data *data,
|
||||
ccw->count = 22;
|
||||
else
|
||||
ccw->count = 20;
|
||||
ccw->cda = (__u32)__pa(data);
|
||||
ccw->cda = (__u32)virt_to_phys(data);
|
||||
}
|
||||
|
||||
memset(data, 0, sizeof(*data));
|
||||
@ -544,11 +544,11 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata,
|
||||
ccw->flags = 0;
|
||||
if (cmd == DASD_ECKD_CCW_WRITE_FULL_TRACK) {
|
||||
ccw->count = sizeof(*pfxdata) + 2;
|
||||
ccw->cda = (__u32) __pa(pfxdata);
|
||||
ccw->cda = (__u32)virt_to_phys(pfxdata);
|
||||
memset(pfxdata, 0, sizeof(*pfxdata) + 2);
|
||||
} else {
|
||||
ccw->count = sizeof(*pfxdata);
|
||||
ccw->cda = (__u32) __pa(pfxdata);
|
||||
ccw->cda = (__u32)virt_to_phys(pfxdata);
|
||||
memset(pfxdata, 0, sizeof(*pfxdata));
|
||||
}
|
||||
|
||||
@ -615,7 +615,7 @@ locate_record(struct ccw1 *ccw, struct LO_eckd_data *data, unsigned int trk,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_LOCATE_RECORD;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32) __pa(data);
|
||||
ccw->cda = (__u32)virt_to_phys(data);
|
||||
|
||||
memset(data, 0, sizeof(struct LO_eckd_data));
|
||||
sector = 0;
|
||||
@ -830,7 +830,7 @@ static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device,
|
||||
ccw = cqr->cpaddr;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RCD;
|
||||
ccw->flags = 0;
|
||||
ccw->cda = (__u32)(addr_t)rcd_buffer;
|
||||
ccw->cda = (__u32)virt_to_phys(rcd_buffer);
|
||||
ccw->count = DASD_ECKD_RCD_DATA_SIZE;
|
||||
cqr->magic = DASD_ECKD_MAGIC;
|
||||
|
||||
@ -858,7 +858,7 @@ static void read_conf_cb(struct dasd_ccw_req *cqr, void *data)
|
||||
|
||||
if (cqr->status != DASD_CQR_DONE) {
|
||||
ccw = cqr->cpaddr;
|
||||
rcd_buffer = (__u8 *)((addr_t) ccw->cda);
|
||||
rcd_buffer = phys_to_virt(ccw->cda);
|
||||
memset(rcd_buffer, 0, sizeof(*rcd_buffer));
|
||||
|
||||
rcd_buffer[0] = 0xE5;
|
||||
@ -1547,7 +1547,7 @@ static int dasd_eckd_read_features(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = sizeof(struct dasd_psf_prssd_data);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t) prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - feature codes */
|
||||
features = (struct dasd_rssd_features *) (prssdp + 1);
|
||||
@ -1556,7 +1556,7 @@ static int dasd_eckd_read_features(struct dasd_device *device)
|
||||
ccw++;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(struct dasd_rssd_features);
|
||||
ccw->cda = (__u32)(addr_t) features;
|
||||
ccw->cda = (__u32)virt_to_phys(features);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -1616,7 +1616,7 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = sizeof(*prssdp);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t)prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - Volume Storage Query */
|
||||
vsq = (struct dasd_rssd_vsq *)(prssdp + 1);
|
||||
@ -1626,7 +1626,7 @@ static int dasd_eckd_read_vol_info(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(*vsq);
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t)vsq;
|
||||
ccw->cda = (__u32)virt_to_phys(vsq);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -1801,7 +1801,7 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = sizeof(*prssdp);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t)prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
lcq = (struct dasd_rssd_lcq *)(prssdp + 1);
|
||||
memset(lcq, 0, sizeof(*lcq));
|
||||
@ -1810,7 +1810,7 @@ static int dasd_eckd_read_ext_pool_info(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(*lcq);
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t)lcq;
|
||||
ccw->cda = (__u32)virt_to_phys(lcq);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -1907,7 +1907,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device,
|
||||
}
|
||||
ccw = cqr->cpaddr;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->cda = (__u32)(addr_t)psf_ssc_data;
|
||||
ccw->cda = (__u32)virt_to_phys(psf_ssc_data);
|
||||
ccw->count = 66;
|
||||
|
||||
cqr->startdev = device;
|
||||
@ -2262,7 +2262,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) count_data;
|
||||
ccw->cda = (__u32)virt_to_phys(count_data);
|
||||
ccw++;
|
||||
count_data++;
|
||||
}
|
||||
@ -2276,7 +2276,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) count_data;
|
||||
ccw->cda = (__u32)virt_to_phys(count_data);
|
||||
|
||||
cqr->block = NULL;
|
||||
cqr->startdev = device;
|
||||
@ -2647,7 +2647,7 @@ dasd_eckd_build_check(struct dasd_device *base, struct format_data_t *fdata,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_READ_COUNT;
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) fmt_buffer;
|
||||
ccw->cda = (__u32)virt_to_phys(fmt_buffer);
|
||||
ccw++;
|
||||
fmt_buffer++;
|
||||
}
|
||||
@ -2857,7 +2857,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_WRITE_RECORD_ZERO;
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) ect;
|
||||
ccw->cda = (__u32)virt_to_phys(ect);
|
||||
ccw++;
|
||||
}
|
||||
if ((intensity & ~0x08) & 0x04) { /* erase track */
|
||||
@ -2872,7 +2872,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_WRITE_CKD;
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) ect;
|
||||
ccw->cda = (__u32)virt_to_phys(ect);
|
||||
} else { /* write remaining records */
|
||||
for (i = 0; i < rpt; i++) {
|
||||
ect = (struct eckd_count *) data;
|
||||
@ -2907,7 +2907,7 @@ dasd_eckd_build_format(struct dasd_device *base, struct dasd_device *startdev,
|
||||
DASD_ECKD_CCW_WRITE_CKD_MT;
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32)(addr_t) ect;
|
||||
ccw->cda = (__u32)virt_to_phys(ect);
|
||||
ccw++;
|
||||
}
|
||||
}
|
||||
@ -3821,7 +3821,7 @@ dasd_eckd_dso_ras(struct dasd_device *device, struct dasd_block *block,
|
||||
}
|
||||
|
||||
ccw = cqr->cpaddr;
|
||||
ccw->cda = (__u32)(addr_t)cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
ccw->cmd_code = DASD_ECKD_CCW_DSO;
|
||||
ccw->count = size;
|
||||
|
||||
@ -4090,11 +4090,11 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
|
||||
ccw->cmd_code = rcmd;
|
||||
ccw->count = count;
|
||||
if (idal_is_needed(dst, blksize)) {
|
||||
ccw->cda = (__u32)(addr_t) idaws;
|
||||
ccw->cda = (__u32)virt_to_phys(idaws);
|
||||
ccw->flags = CCW_FLAG_IDA;
|
||||
idaws = idal_create_words(idaws, dst, blksize);
|
||||
} else {
|
||||
ccw->cda = (__u32)(addr_t) dst;
|
||||
ccw->cda = (__u32)virt_to_phys(dst);
|
||||
ccw->flags = 0;
|
||||
}
|
||||
ccw++;
|
||||
@ -4228,7 +4228,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
|
||||
ccw[-1].flags |= CCW_FLAG_CC;
|
||||
ccw->cmd_code = cmd;
|
||||
ccw->count = len_to_track_end;
|
||||
ccw->cda = (__u32)(addr_t)idaws;
|
||||
ccw->cda = (__u32)virt_to_phys(idaws);
|
||||
ccw->flags = CCW_FLAG_IDA;
|
||||
ccw++;
|
||||
recid += count;
|
||||
@ -4244,7 +4244,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
|
||||
* idaw ends
|
||||
*/
|
||||
if (!idaw_dst) {
|
||||
if (__pa(dst) & (IDA_BLOCK_SIZE-1)) {
|
||||
if ((__u32)virt_to_phys(dst) & (IDA_BLOCK_SIZE - 1)) {
|
||||
dasd_sfree_request(cqr, startdev);
|
||||
return ERR_PTR(-ERANGE);
|
||||
} else
|
||||
@ -4264,7 +4264,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
|
||||
* idal_create_words will handle cases where idaw_len
|
||||
* is larger then IDA_BLOCK_SIZE
|
||||
*/
|
||||
if (!(__pa(idaw_dst + idaw_len) & (IDA_BLOCK_SIZE-1)))
|
||||
if (!((__u32)virt_to_phys(idaw_dst + idaw_len) & (IDA_BLOCK_SIZE - 1)))
|
||||
end_idaw = 1;
|
||||
/* We also need to end the idaw at track end */
|
||||
if (!len_to_track_end) {
|
||||
@ -4817,7 +4817,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
|
||||
ccw->count = 57326;
|
||||
/* 64k map to one track */
|
||||
len_to_track_end = 65536 - start_padding_sectors * 512;
|
||||
ccw->cda = (__u32)(addr_t)idaws;
|
||||
ccw->cda = (__u32)virt_to_phys(idaws);
|
||||
ccw->flags |= CCW_FLAG_IDA;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw++;
|
||||
@ -4836,7 +4836,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
|
||||
ccw->count = 57326;
|
||||
/* 64k map to one track */
|
||||
len_to_track_end = 65536;
|
||||
ccw->cda = (__u32)(addr_t)idaws;
|
||||
ccw->cda = (__u32)virt_to_phys(idaws);
|
||||
ccw->flags |= CCW_FLAG_IDA;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw++;
|
||||
@ -4893,9 +4893,9 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req)
|
||||
ccw++;
|
||||
if (dst) {
|
||||
if (ccw->flags & CCW_FLAG_IDA)
|
||||
cda = *((char **)((addr_t) ccw->cda));
|
||||
cda = *((char **)phys_to_virt(ccw->cda));
|
||||
else
|
||||
cda = (char *)((addr_t) ccw->cda);
|
||||
cda = phys_to_virt(ccw->cda);
|
||||
if (dst != cda) {
|
||||
if (rq_data_dir(req) == READ)
|
||||
memcpy(dst, cda, bv.bv_len);
|
||||
@ -5045,7 +5045,7 @@ dasd_eckd_release(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RELEASE;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->count = 32;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
cqr->startdev = device;
|
||||
cqr->memdev = device;
|
||||
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
|
||||
@ -5100,7 +5100,7 @@ dasd_eckd_reserve(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RESERVE;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->count = 32;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
cqr->startdev = device;
|
||||
cqr->memdev = device;
|
||||
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
|
||||
@ -5154,7 +5154,7 @@ dasd_eckd_steal_lock(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_SLCK;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->count = 32;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
cqr->startdev = device;
|
||||
cqr->memdev = device;
|
||||
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
|
||||
@ -5215,7 +5215,7 @@ static int dasd_eckd_snid(struct dasd_device *device,
|
||||
ccw->cmd_code = DASD_ECKD_CCW_SNID;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->count = 12;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
cqr->startdev = device;
|
||||
cqr->memdev = device;
|
||||
clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
|
||||
@ -5282,7 +5282,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = sizeof(struct dasd_psf_prssd_data);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t) prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - Performance Statistics */
|
||||
stats = (struct dasd_rssd_perf_stats_t *) (prssdp + 1);
|
||||
@ -5291,7 +5291,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp)
|
||||
ccw++;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(struct dasd_rssd_perf_stats_t);
|
||||
ccw->cda = (__u32)(addr_t) stats;
|
||||
ccw->cda = (__u32)virt_to_phys(stats);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -5435,7 +5435,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->count = usrparm.psf_data_len;
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->cda = (__u32)(addr_t) psf_data;
|
||||
ccw->cda = (__u32)virt_to_phys(psf_data);
|
||||
|
||||
ccw++;
|
||||
|
||||
@ -5443,7 +5443,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = usrparm.rssd_result_len;
|
||||
ccw->flags = CCW_FLAG_SLI ;
|
||||
ccw->cda = (__u32)(addr_t) rssd_result;
|
||||
ccw->cda = (__u32)virt_to_phys(rssd_result);
|
||||
|
||||
rc = dasd_sleep_on(cqr);
|
||||
if (rc)
|
||||
@ -5512,9 +5512,9 @@ dasd_eckd_dump_ccw_range(struct ccw1 *from, struct ccw1 *to, char *page)
|
||||
|
||||
/* get pointer to data (consider IDALs) */
|
||||
if (from->flags & CCW_FLAG_IDA)
|
||||
datap = (char *) *((addr_t *) (addr_t) from->cda);
|
||||
datap = (char *)*((addr_t *)phys_to_virt(from->cda));
|
||||
else
|
||||
datap = (char *) ((addr_t) from->cda);
|
||||
datap = phys_to_virt(from->cda);
|
||||
|
||||
/* dump data (max 128 bytes) */
|
||||
for (count = 0; count < from->count && count < 128; count++) {
|
||||
@ -5585,7 +5585,7 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
|
||||
len += sprintf(page + len, PRINTK_HEADER
|
||||
" device %s: Failing CCW: %p\n",
|
||||
dev_name(&device->cdev->dev),
|
||||
(void *) (addr_t) irb->scsw.cmd.cpa);
|
||||
phys_to_virt(irb->scsw.cmd.cpa));
|
||||
if (irb->esw.esw0.erw.cons) {
|
||||
for (sl = 0; sl < 4; sl++) {
|
||||
len += sprintf(page + len, PRINTK_HEADER
|
||||
@ -5632,8 +5632,7 @@ static void dasd_eckd_dump_sense_ccw(struct dasd_device *device,
|
||||
/* print failing CCW area (maximum 4) */
|
||||
/* scsw->cda is either valid or zero */
|
||||
from = ++to;
|
||||
fail = (struct ccw1 *)(addr_t)
|
||||
irb->scsw.cmd.cpa; /* failing CCW */
|
||||
fail = phys_to_virt(irb->scsw.cmd.cpa); /* failing CCW */
|
||||
if (from < fail - 2) {
|
||||
from = fail - 2; /* there is a gap - print header */
|
||||
printk(KERN_ERR PRINTK_HEADER "......\n");
|
||||
@ -5687,13 +5686,12 @@ static void dasd_eckd_dump_sense_tcw(struct dasd_device *device,
|
||||
len += sprintf(page + len, PRINTK_HEADER
|
||||
" device %s: Failing TCW: %p\n",
|
||||
dev_name(&device->cdev->dev),
|
||||
(void *) (addr_t) irb->scsw.tm.tcw);
|
||||
phys_to_virt(irb->scsw.tm.tcw));
|
||||
|
||||
tsb = NULL;
|
||||
sense = NULL;
|
||||
if (irb->scsw.tm.tcw && (irb->scsw.tm.fcxs & 0x01))
|
||||
tsb = tcw_get_tsb(
|
||||
(struct tcw *)(unsigned long)irb->scsw.tm.tcw);
|
||||
tsb = tcw_get_tsb(phys_to_virt(irb->scsw.tm.tcw));
|
||||
|
||||
if (tsb) {
|
||||
len += sprintf(page + len, PRINTK_HEADER
|
||||
@ -5917,7 +5915,7 @@ retry:
|
||||
ccw->count = sizeof(struct dasd_psf_prssd_data);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t) prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - message buffer */
|
||||
message_buf = (struct dasd_rssd_messages *) (prssdp + 1);
|
||||
@ -5927,7 +5925,7 @@ retry:
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(struct dasd_rssd_messages);
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t) message_buf;
|
||||
ccw->cda = (__u32)virt_to_phys(message_buf);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -6008,14 +6006,14 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
|
||||
ccw->count = sizeof(struct dasd_psf_prssd_data);
|
||||
ccw->flags |= CCW_FLAG_CC;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t) prssdp;
|
||||
ccw->cda = (__u32)virt_to_phys(prssdp);
|
||||
|
||||
/* Read Subsystem Data - query host access */
|
||||
ccw++;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_RSSD;
|
||||
ccw->count = sizeof(struct dasd_psf_query_host_access);
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->cda = (__u32)(addr_t) host_access;
|
||||
ccw->cda = (__u32)virt_to_phys(host_access);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
@ -6351,7 +6349,7 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
|
||||
psf_cuir->ssid = device->path[pos].ssid;
|
||||
ccw = cqr->cpaddr;
|
||||
ccw->cmd_code = DASD_ECKD_CCW_PSF;
|
||||
ccw->cda = (__u32)(addr_t)psf_cuir;
|
||||
ccw->cda = (__u32)virt_to_phys(psf_cuir);
|
||||
ccw->flags = CCW_FLAG_SLI;
|
||||
ccw->count = sizeof(struct dasd_psf_cuir_response);
|
||||
|
||||
@ -6956,8 +6954,10 @@ dasd_eckd_init(void)
|
||||
return -ENOMEM;
|
||||
dasd_vol_info_req = kmalloc(sizeof(*dasd_vol_info_req),
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!dasd_vol_info_req)
|
||||
if (!dasd_vol_info_req) {
|
||||
kfree(dasd_reserve_req);
|
||||
return -ENOMEM;
|
||||
}
|
||||
pe_handler_worker = kmalloc(sizeof(*pe_handler_worker),
|
||||
GFP_KERNEL | GFP_DMA);
|
||||
if (!pe_handler_worker) {
|
||||
|
@ -491,7 +491,7 @@ int dasd_eer_enable(struct dasd_device *device)
|
||||
ccw->cmd_code = DASD_ECKD_CCW_SNSS;
|
||||
ccw->count = SNSS_DATA_SIZE;
|
||||
ccw->flags = 0;
|
||||
ccw->cda = (__u32)(addr_t) cqr->data;
|
||||
ccw->cda = (__u32)virt_to_phys(cqr->data);
|
||||
|
||||
cqr->buildclk = get_tod_clock();
|
||||
cqr->status = DASD_CQR_FILLED;
|
||||
|
@ -83,7 +83,7 @@ define_extent(struct ccw1 * ccw, struct DE_fba_data *data, int rw,
|
||||
ccw->cmd_code = DASD_FBA_CCW_DEFINE_EXTENT;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 16;
|
||||
ccw->cda = (__u32) __pa(data);
|
||||
ccw->cda = (__u32)virt_to_phys(data);
|
||||
memset(data, 0, sizeof (struct DE_fba_data));
|
||||
if (rw == WRITE)
|
||||
(data->mask).perm = 0x0;
|
||||
@ -103,7 +103,7 @@ locate_record(struct ccw1 * ccw, struct LO_fba_data *data, int rw,
|
||||
ccw->cmd_code = DASD_FBA_CCW_LOCATE;
|
||||
ccw->flags = 0;
|
||||
ccw->count = 8;
|
||||
ccw->cda = (__u32) __pa(data);
|
||||
ccw->cda = (__u32)virt_to_phys(data);
|
||||
memset(data, 0, sizeof (struct LO_fba_data));
|
||||
if (rw == WRITE)
|
||||
data->operation.cmd = 0x5;
|
||||
@ -262,7 +262,7 @@ static void ccw_write_zero(struct ccw1 *ccw, int count)
|
||||
ccw->cmd_code = DASD_FBA_CCW_WRITE;
|
||||
ccw->flags |= CCW_FLAG_SLI;
|
||||
ccw->count = count;
|
||||
ccw->cda = (__u32) (addr_t) dasd_fba_zero_page;
|
||||
ccw->cda = (__u32)virt_to_phys(dasd_fba_zero_page);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -528,11 +528,11 @@ static struct dasd_ccw_req *dasd_fba_build_cp_regular(
|
||||
ccw->cmd_code = cmd;
|
||||
ccw->count = block->bp_block;
|
||||
if (idal_is_needed(dst, blksize)) {
|
||||
ccw->cda = (__u32)(addr_t) idaws;
|
||||
ccw->cda = (__u32)virt_to_phys(idaws);
|
||||
ccw->flags = CCW_FLAG_IDA;
|
||||
idaws = idal_create_words(idaws, dst, blksize);
|
||||
} else {
|
||||
ccw->cda = (__u32)(addr_t) dst;
|
||||
ccw->cda = (__u32)virt_to_phys(dst);
|
||||
ccw->flags = 0;
|
||||
}
|
||||
ccw++;
|
||||
@ -590,9 +590,9 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req)
|
||||
ccw++;
|
||||
if (dst) {
|
||||
if (ccw->flags & CCW_FLAG_IDA)
|
||||
cda = *((char **)((addr_t) ccw->cda));
|
||||
cda = *((char **)phys_to_virt(ccw->cda));
|
||||
else
|
||||
cda = (char *)((addr_t) ccw->cda);
|
||||
cda = phys_to_virt(ccw->cda);
|
||||
if (dst != cda) {
|
||||
if (rq_data_dir(req) == READ)
|
||||
memcpy(dst, cda, bv.bv_len);
|
||||
|
@ -864,10 +864,6 @@ dcssblk_submit_bio(struct bio *bio)
|
||||
unsigned long source_addr;
|
||||
unsigned long bytes_done;
|
||||
|
||||
bio = bio_split_to_limits(bio);
|
||||
if (!bio)
|
||||
return;
|
||||
|
||||
bytes_done = 0;
|
||||
dev_info = bio->bi_bdev->bd_disk->private_data;
|
||||
if (dev_info == NULL)
|
||||
|
@ -831,6 +831,19 @@ static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
|
||||
blk_queue_max_discard_sectors(q, max_blocks * (logical_block_size >> 9));
|
||||
}
|
||||
|
||||
static void *sd_set_special_bvec(struct request *rq, unsigned int data_len)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
|
||||
if (!page)
|
||||
return NULL;
|
||||
clear_highpage(page);
|
||||
bvec_set_page(&rq->special_vec, page, data_len, 0);
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
return bvec_virt(&rq->special_vec);
|
||||
}
|
||||
|
||||
static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
|
||||
{
|
||||
struct scsi_device *sdp = cmd->device;
|
||||
@ -841,19 +854,14 @@ static blk_status_t sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
|
||||
unsigned int data_len = 24;
|
||||
char *buf;
|
||||
|
||||
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
|
||||
if (!rq->special_vec.bv_page)
|
||||
buf = sd_set_special_bvec(rq, data_len);
|
||||
if (!buf)
|
||||
return BLK_STS_RESOURCE;
|
||||
clear_highpage(rq->special_vec.bv_page);
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = UNMAP;
|
||||
cmd->cmnd[8] = 24;
|
||||
|
||||
buf = bvec_virt(&rq->special_vec);
|
||||
put_unaligned_be16(6 + 16, &buf[0]);
|
||||
put_unaligned_be16(16, &buf[2]);
|
||||
put_unaligned_be64(lba, &buf[8]);
|
||||
@ -876,13 +884,8 @@ static blk_status_t sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd,
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
u32 data_len = sdp->sector_size;
|
||||
|
||||
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
|
||||
if (!rq->special_vec.bv_page)
|
||||
if (!sd_set_special_bvec(rq, data_len))
|
||||
return BLK_STS_RESOURCE;
|
||||
clear_highpage(rq->special_vec.bv_page);
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 16;
|
||||
cmd->cmnd[0] = WRITE_SAME_16;
|
||||
@ -908,13 +911,8 @@ static blk_status_t sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd,
|
||||
u32 nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
|
||||
u32 data_len = sdp->sector_size;
|
||||
|
||||
rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
|
||||
if (!rq->special_vec.bv_page)
|
||||
if (!sd_set_special_bvec(rq, data_len))
|
||||
return BLK_STS_RESOURCE;
|
||||
clear_highpage(rq->special_vec.bv_page);
|
||||
rq->special_vec.bv_offset = 0;
|
||||
rq->special_vec.bv_len = data_len;
|
||||
rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
|
||||
|
||||
cmd->cmd_len = 10;
|
||||
cmd->cmnd[0] = WRITE_SAME;
|
||||
|
@ -281,10 +281,8 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
|
||||
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
|
||||
|
||||
for_each_sg(sgl, sg, sgl_nents, i) {
|
||||
aio_cmd->bvecs[i].bv_page = sg_page(sg);
|
||||
aio_cmd->bvecs[i].bv_len = sg->length;
|
||||
aio_cmd->bvecs[i].bv_offset = sg->offset;
|
||||
|
||||
bvec_set_page(&aio_cmd->bvecs[i], sg_page(sg), sg->length,
|
||||
sg->offset);
|
||||
len += sg->length;
|
||||
}
|
||||
|
||||
@ -329,10 +327,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
|
||||
}
|
||||
|
||||
for_each_sg(sgl, sg, sgl_nents, i) {
|
||||
bvec[i].bv_page = sg_page(sg);
|
||||
bvec[i].bv_len = sg->length;
|
||||
bvec[i].bv_offset = sg->offset;
|
||||
|
||||
bvec_set_page(&bvec[i], sg_page(sg), sg->length, sg->offset);
|
||||
len += sg->length;
|
||||
}
|
||||
|
||||
@ -465,10 +460,9 @@ fd_execute_write_same(struct se_cmd *cmd)
|
||||
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
|
||||
|
||||
for (i = 0; i < nolb; i++) {
|
||||
bvec[i].bv_page = sg_page(&cmd->t_data_sg[0]);
|
||||
bvec[i].bv_len = cmd->t_data_sg[0].length;
|
||||
bvec[i].bv_offset = cmd->t_data_sg[0].offset;
|
||||
|
||||
bvec_set_page(&bvec[i], sg_page(&cmd->t_data_sg[0]),
|
||||
cmd->t_data_sg[0].length,
|
||||
cmd->t_data_sg[0].offset);
|
||||
len += se_dev->dev_attrib.block_size;
|
||||
}
|
||||
|
||||
|
@ -1126,9 +1126,8 @@ static int iotlb_translate(const struct vringh *vrh,
|
||||
size = map->size - addr + map->start;
|
||||
pa = map->addr + addr - map->start;
|
||||
pfn = pa >> PAGE_SHIFT;
|
||||
iov[ret].bv_page = pfn_to_page(pfn);
|
||||
iov[ret].bv_len = min(len - s, size);
|
||||
iov[ret].bv_offset = pa & (PAGE_SIZE - 1);
|
||||
bvec_set_page(&iov[ret], pfn_to_page(pfn), min(len - s, size),
|
||||
pa & (PAGE_SIZE - 1));
|
||||
s += size;
|
||||
addr += size;
|
||||
++ret;
|
||||
|
@ -992,7 +992,7 @@ int afs_launder_folio(struct folio *folio)
|
||||
{
|
||||
struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
|
||||
struct iov_iter iter;
|
||||
struct bio_vec bv[1];
|
||||
struct bio_vec bv;
|
||||
unsigned long priv;
|
||||
unsigned int f, t;
|
||||
int ret = 0;
|
||||
@ -1008,10 +1008,8 @@ int afs_launder_folio(struct folio *folio)
|
||||
t = afs_folio_dirty_to(folio, priv);
|
||||
}
|
||||
|
||||
bv[0].bv_page = &folio->page;
|
||||
bv[0].bv_offset = f;
|
||||
bv[0].bv_len = t - f;
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, bv, 1, bv[0].bv_len);
|
||||
bvec_set_folio(&bv, folio, t - f, f);
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, bv.bv_len);
|
||||
|
||||
trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
|
||||
ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
|
||||
|
@ -103,14 +103,10 @@ static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
|
||||
size += bytes;
|
||||
|
||||
for ( ; bytes; idx++, bvec_idx++) {
|
||||
struct bio_vec bv = {
|
||||
.bv_page = pages[idx],
|
||||
.bv_len = min_t(int, bytes, PAGE_SIZE - start),
|
||||
.bv_offset = start,
|
||||
};
|
||||
int len = min_t(int, bytes, PAGE_SIZE - start);
|
||||
|
||||
bvecs[bvec_idx] = bv;
|
||||
bytes -= bv.bv_len;
|
||||
bvec_set_page(&bvecs[bvec_idx], pages[idx], len, start);
|
||||
bytes -= len;
|
||||
start = 0;
|
||||
}
|
||||
}
|
||||
|
@ -759,8 +759,9 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
|
||||
unsigned int page_offset, unsigned int to_read)
|
||||
{
|
||||
struct msghdr smb_msg = {};
|
||||
struct bio_vec bv = {
|
||||
.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
|
||||
struct bio_vec bv;
|
||||
|
||||
bvec_set_page(&bv, page, to_read, page_offset);
|
||||
iov_iter_bvec(&smb_msg.msg_iter, ITER_DEST, &bv, 1, to_read);
|
||||
return cifs_readv_from_socket(server, &smb_msg);
|
||||
}
|
||||
|
@ -143,14 +143,12 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page)
|
||||
struct netfs_cache_resources cres;
|
||||
struct fscache_cookie *cookie = cifs_inode_cookie(inode);
|
||||
struct iov_iter iter;
|
||||
struct bio_vec bvec[1];
|
||||
struct bio_vec bvec;
|
||||
int ret;
|
||||
|
||||
memset(&cres, 0, sizeof(cres));
|
||||
bvec[0].bv_page = page;
|
||||
bvec[0].bv_offset = 0;
|
||||
bvec[0].bv_len = PAGE_SIZE;
|
||||
iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE);
|
||||
|
||||
ret = fscache_begin_read_operation(&cres, cookie);
|
||||
if (ret < 0)
|
||||
@ -171,16 +169,14 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page,
|
||||
struct netfs_cache_resources cres;
|
||||
struct fscache_cookie *cookie = cifs_inode_cookie(inode);
|
||||
struct iov_iter iter;
|
||||
struct bio_vec bvec[1];
|
||||
struct bio_vec bvec;
|
||||
loff_t start = page_offset(page);
|
||||
size_t len = PAGE_SIZE;
|
||||
int ret;
|
||||
|
||||
memset(&cres, 0, sizeof(cres));
|
||||
bvec[0].bv_page = page;
|
||||
bvec[0].bv_offset = 0;
|
||||
bvec[0].bv_len = PAGE_SIZE;
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
|
||||
|
||||
ret = fscache_begin_write_operation(&cres, cookie);
|
||||
if (ret < 0)
|
||||
|
@ -1054,9 +1054,8 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
|
||||
|
||||
for (i = 0; i < cur_npages; i++) {
|
||||
len = rc > PAGE_SIZE ? PAGE_SIZE : rc;
|
||||
bv[npages + i].bv_page = pages[i];
|
||||
bv[npages + i].bv_offset = start;
|
||||
bv[npages + i].bv_len = len - start;
|
||||
bvec_set_page(&bv[npages + i], pages[i], len - start,
|
||||
start);
|
||||
rc -= len;
|
||||
start = 0;
|
||||
}
|
||||
|
@ -4598,9 +4598,9 @@ init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size,
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < npages; i++) {
|
||||
bvec[i].bv_page = pages[i];
|
||||
bvec[i].bv_offset = (i == 0) ? cur_off : 0;
|
||||
bvec[i].bv_len = min_t(unsigned int, PAGE_SIZE, data_size);
|
||||
bvec_set_page(&bvec[i], pages[i],
|
||||
min_t(unsigned int, PAGE_SIZE, data_size),
|
||||
i == 0 ? cur_off : 0);
|
||||
data_size -= bvec[i].bv_len;
|
||||
}
|
||||
|
||||
|
@ -864,11 +864,7 @@ EXPORT_SYMBOL(dump_skip);
|
||||
#ifdef CONFIG_ELF_CORE
|
||||
static int dump_emit_page(struct coredump_params *cprm, struct page *page)
|
||||
{
|
||||
struct bio_vec bvec = {
|
||||
.bv_page = page,
|
||||
.bv_offset = 0,
|
||||
.bv_len = PAGE_SIZE,
|
||||
};
|
||||
struct bio_vec bvec;
|
||||
struct iov_iter iter;
|
||||
struct file *file = cprm->file;
|
||||
loff_t pos;
|
||||
@ -884,6 +880,7 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
|
||||
if (dump_interrupted())
|
||||
return 0;
|
||||
pos = file->f_pos;
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
|
||||
n = __kernel_write_iter(cprm->file, &iter, &pos);
|
||||
if (n != PAGE_SIZE)
|
||||
|
@ -245,14 +245,12 @@ static int fscache_fallback_read_page(struct inode *inode, struct page *page)
|
||||
struct netfs_cache_resources cres;
|
||||
struct fscache_cookie *cookie = nfs_i_fscache(inode);
|
||||
struct iov_iter iter;
|
||||
struct bio_vec bvec[1];
|
||||
struct bio_vec bvec;
|
||||
int ret;
|
||||
|
||||
memset(&cres, 0, sizeof(cres));
|
||||
bvec[0].bv_page = page;
|
||||
bvec[0].bv_offset = 0;
|
||||
bvec[0].bv_len = PAGE_SIZE;
|
||||
iov_iter_bvec(&iter, ITER_DEST, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
iov_iter_bvec(&iter, ITER_DEST, &bvec, 1, PAGE_SIZE);
|
||||
|
||||
ret = fscache_begin_read_operation(&cres, cookie);
|
||||
if (ret < 0)
|
||||
@ -273,16 +271,14 @@ static int fscache_fallback_write_page(struct inode *inode, struct page *page,
|
||||
struct netfs_cache_resources cres;
|
||||
struct fscache_cookie *cookie = nfs_i_fscache(inode);
|
||||
struct iov_iter iter;
|
||||
struct bio_vec bvec[1];
|
||||
struct bio_vec bvec;
|
||||
loff_t start = page_offset(page);
|
||||
size_t len = PAGE_SIZE;
|
||||
int ret;
|
||||
|
||||
memset(&cres, 0, sizeof(cres));
|
||||
bvec[0].bv_page = page;
|
||||
bvec[0].bv_offset = 0;
|
||||
bvec[0].bv_len = PAGE_SIZE;
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE);
|
||||
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
|
||||
|
||||
ret = fscache_begin_write_operation(&cres, cookie);
|
||||
if (ret < 0)
|
||||
|
@ -49,10 +49,8 @@ static int orangefs_writepage_locked(struct page *page,
|
||||
/* Should've been handled in orangefs_invalidate_folio. */
|
||||
WARN_ON(off == len || off + wlen > len);
|
||||
|
||||
bv.bv_page = page;
|
||||
bv.bv_len = wlen;
|
||||
bv.bv_offset = off % PAGE_SIZE;
|
||||
WARN_ON(wlen == 0);
|
||||
bvec_set_page(&bv, page, wlen, off % PAGE_SIZE);
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, &bv, 1, wlen);
|
||||
|
||||
ret = wait_for_direct_io(ORANGEFS_IO_WRITE, inode, &off, &iter, wlen,
|
||||
@ -102,15 +100,11 @@ static int orangefs_writepages_work(struct orangefs_writepages *ow,
|
||||
|
||||
for (i = 0; i < ow->npages; i++) {
|
||||
set_page_writeback(ow->pages[i]);
|
||||
ow->bv[i].bv_page = ow->pages[i];
|
||||
ow->bv[i].bv_len = min(page_offset(ow->pages[i]) + PAGE_SIZE,
|
||||
ow->off + ow->len) -
|
||||
max(ow->off, page_offset(ow->pages[i]));
|
||||
if (i == 0)
|
||||
ow->bv[i].bv_offset = ow->off -
|
||||
page_offset(ow->pages[i]);
|
||||
else
|
||||
ow->bv[i].bv_offset = 0;
|
||||
bvec_set_page(&ow->bv[i], ow->pages[i],
|
||||
min(page_offset(ow->pages[i]) + PAGE_SIZE,
|
||||
ow->off + ow->len) -
|
||||
max(ow->off, page_offset(ow->pages[i])),
|
||||
i == 0 ? ow->off - page_offset(ow->pages[i]) : 0);
|
||||
}
|
||||
iov_iter_bvec(&iter, ITER_SOURCE, ow->bv, ow->npages, ow->len);
|
||||
|
||||
@ -300,9 +294,7 @@ static int orangefs_read_folio(struct file *file, struct folio *folio)
|
||||
orangefs_launder_folio(folio);
|
||||
|
||||
off = folio_pos(folio);
|
||||
bv.bv_page = &folio->page;
|
||||
bv.bv_len = folio_size(folio);
|
||||
bv.bv_offset = 0;
|
||||
bvec_set_folio(&bv, folio, folio_size(folio), 0);
|
||||
iov_iter_bvec(&iter, ITER_DEST, &bv, 1, folio_size(folio));
|
||||
|
||||
ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
|
||||
|
@ -675,9 +675,8 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
|
||||
goto done;
|
||||
}
|
||||
|
||||
array[n].bv_page = buf->page;
|
||||
array[n].bv_len = this_len;
|
||||
array[n].bv_offset = buf->offset;
|
||||
bvec_set_page(&array[n], buf->page, this_len,
|
||||
buf->offset);
|
||||
left -= this_len;
|
||||
n++;
|
||||
}
|
||||
|
@ -288,6 +288,7 @@ struct queue_limits {
|
||||
unsigned int max_dev_sectors;
|
||||
unsigned int chunk_sectors;
|
||||
unsigned int max_sectors;
|
||||
unsigned int max_user_sectors;
|
||||
unsigned int max_segment_size;
|
||||
unsigned int physical_block_size;
|
||||
unsigned int logical_block_size;
|
||||
@ -484,6 +485,7 @@ struct request_queue {
|
||||
DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
|
||||
struct blkcg_gq *root_blkg;
|
||||
struct list_head blkg_list;
|
||||
struct mutex blkcg_mutex;
|
||||
#endif
|
||||
|
||||
struct queue_limits limits;
|
||||
@ -1095,11 +1097,12 @@ static inline bool bdev_is_partition(struct block_device *bdev)
|
||||
enum blk_default_limits {
|
||||
BLK_MAX_SEGMENTS = 128,
|
||||
BLK_SAFE_MAX_SECTORS = 255,
|
||||
BLK_DEF_MAX_SECTORS = 2560,
|
||||
BLK_MAX_SEGMENT_SIZE = 65536,
|
||||
BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
|
||||
};
|
||||
|
||||
#define BLK_DEF_MAX_SECTORS 2560u
|
||||
|
||||
static inline unsigned long queue_segment_boundary(const struct request_queue *q)
|
||||
{
|
||||
return q->limits.seg_boundary_mask;
|
||||
@ -1283,12 +1286,12 @@ static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
|
||||
|
||||
static inline bool bdev_is_zoned(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
return blk_queue_is_zoned(bdev_get_queue(bdev));
|
||||
}
|
||||
|
||||
if (q)
|
||||
return blk_queue_is_zoned(q);
|
||||
|
||||
return false;
|
||||
static inline unsigned int bdev_zone_no(struct block_device *bdev, sector_t sec)
|
||||
{
|
||||
return disk_zone_no(bdev->bd_disk, sec);
|
||||
}
|
||||
|
||||
static inline bool bdev_op_is_zoned_write(struct block_device *bdev,
|
||||
@ -1309,6 +1312,18 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
|
||||
return q->limits.chunk_sectors;
|
||||
}
|
||||
|
||||
static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev,
|
||||
sector_t sector)
|
||||
{
|
||||
return sector & (bdev_zone_sectors(bdev) - 1);
|
||||
}
|
||||
|
||||
static inline bool bdev_is_zone_start(struct block_device *bdev,
|
||||
sector_t sector)
|
||||
{
|
||||
return bdev_offset_from_zone_start(bdev, sector) == 0;
|
||||
}
|
||||
|
||||
static inline int queue_dma_alignment(const struct request_queue *q)
|
||||
{
|
||||
return q ? q->limits.dma_alignment : 511;
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct page;
|
||||
@ -35,6 +34,46 @@ struct bio_vec {
|
||||
unsigned int bv_offset;
|
||||
};
|
||||
|
||||
/**
|
||||
* bvec_set_page - initialize a bvec based off a struct page
|
||||
* @bv: bvec to initialize
|
||||
* @page: page the bvec should point to
|
||||
* @len: length of the bvec
|
||||
* @offset: offset into the page
|
||||
*/
|
||||
static inline void bvec_set_page(struct bio_vec *bv, struct page *page,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
bv->bv_page = page;
|
||||
bv->bv_len = len;
|
||||
bv->bv_offset = offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* bvec_set_folio - initialize a bvec based off a struct folio
|
||||
* @bv: bvec to initialize
|
||||
* @folio: folio the bvec should point to
|
||||
* @len: length of the bvec
|
||||
* @offset: offset into the folio
|
||||
*/
|
||||
static inline void bvec_set_folio(struct bio_vec *bv, struct folio *folio,
|
||||
unsigned int len, unsigned int offset)
|
||||
{
|
||||
bvec_set_page(bv, &folio->page, len, offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* bvec_set_virt - initialize a bvec based on a virtual address
|
||||
* @bv: bvec to initialize
|
||||
* @vaddr: virtual address to set the bvec to
|
||||
* @len: length of the bvec
|
||||
*/
|
||||
static inline void bvec_set_virt(struct bio_vec *bv, void *vaddr,
|
||||
unsigned int len)
|
||||
{
|
||||
bvec_set_page(bv, virt_to_page(vaddr), len, offset_in_page(vaddr));
|
||||
}
|
||||
|
||||
struct bvec_iter {
|
||||
sector_t bi_sector; /* device address in 512 byte
|
||||
sectors */
|
||||
|
@ -38,13 +38,6 @@
|
||||
|
||||
#endif
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
#define REL_VERSION "8.4.11"
|
||||
#define API_VERSION 1
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 101
|
||||
|
||||
|
||||
enum drbd_io_error_p {
|
||||
EP_PASS_ON, /* FIXME should the better be named "Ignore"? */
|
||||
EP_CALL_HELPER,
|
||||
|
16
include/linux/drbd_config.h
Normal file
16
include/linux/drbd_config.h
Normal file
@ -0,0 +1,16 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* drbd_config.h
|
||||
* DRBD's compile time configuration.
|
||||
*/
|
||||
|
||||
#ifndef DRBD_CONFIG_H
|
||||
#define DRBD_CONFIG_H
|
||||
|
||||
extern const char *drbd_buildtag(void);
|
||||
|
||||
#define REL_VERSION "8.4.11"
|
||||
#define PRO_VERSION_MIN 86
|
||||
#define PRO_VERSION_MAX 101
|
||||
|
||||
#endif
|
@ -47,7 +47,7 @@ enum drbd_state_info_bcast_reason {
|
||||
#undef linux
|
||||
|
||||
#include <linux/drbd.h>
|
||||
#define GENL_MAGIC_VERSION API_VERSION
|
||||
#define GENL_MAGIC_VERSION 1
|
||||
#define GENL_MAGIC_FAMILY drbd
|
||||
#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr)
|
||||
#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
drbd_limits.h
|
||||
This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
|
||||
@ -16,123 +16,123 @@
|
||||
|
||||
#define DEBUG_RANGE_CHECK 0
|
||||
|
||||
#define DRBD_MINOR_COUNT_MIN 1
|
||||
#define DRBD_MINOR_COUNT_MAX 255
|
||||
#define DRBD_MINOR_COUNT_DEF 32
|
||||
#define DRBD_MINOR_COUNT_MIN 1U
|
||||
#define DRBD_MINOR_COUNT_MAX 255U
|
||||
#define DRBD_MINOR_COUNT_DEF 32U
|
||||
#define DRBD_MINOR_COUNT_SCALE '1'
|
||||
|
||||
#define DRBD_VOLUME_MAX 65535
|
||||
#define DRBD_VOLUME_MAX 65534U
|
||||
|
||||
#define DRBD_DIALOG_REFRESH_MIN 0
|
||||
#define DRBD_DIALOG_REFRESH_MAX 600
|
||||
#define DRBD_DIALOG_REFRESH_MIN 0U
|
||||
#define DRBD_DIALOG_REFRESH_MAX 600U
|
||||
#define DRBD_DIALOG_REFRESH_SCALE '1'
|
||||
|
||||
/* valid port number */
|
||||
#define DRBD_PORT_MIN 1
|
||||
#define DRBD_PORT_MAX 0xffff
|
||||
#define DRBD_PORT_MIN 1U
|
||||
#define DRBD_PORT_MAX 0xffffU
|
||||
#define DRBD_PORT_SCALE '1'
|
||||
|
||||
/* startup { */
|
||||
/* if you want more than 3.4 days, disable */
|
||||
#define DRBD_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_WFC_TIMEOUT_MIN 0U
|
||||
#define DRBD_WFC_TIMEOUT_MAX 300000U
|
||||
#define DRBD_WFC_TIMEOUT_DEF 0U
|
||||
#define DRBD_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MIN 0U
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_MAX 300000U
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_DEF 0U
|
||||
#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
|
||||
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0U
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000U
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0U
|
||||
#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
|
||||
/* }*/
|
||||
|
||||
/* net { */
|
||||
/* timeout, unit centi seconds
|
||||
* more than one minute timeout is not useful */
|
||||
#define DRBD_TIMEOUT_MIN 1
|
||||
#define DRBD_TIMEOUT_MAX 600
|
||||
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
|
||||
#define DRBD_TIMEOUT_MIN 1U
|
||||
#define DRBD_TIMEOUT_MAX 600U
|
||||
#define DRBD_TIMEOUT_DEF 60U /* 6 seconds */
|
||||
#define DRBD_TIMEOUT_SCALE '1'
|
||||
|
||||
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
|
||||
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
|
||||
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MIN 0U /* 0 = disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MAX 6000U /* 10 Minutes */
|
||||
#define DRBD_DISK_TIMEOUT_DEF 0U /* disabled */
|
||||
#define DRBD_DISK_TIMEOUT_SCALE '1'
|
||||
|
||||
/* active connection retries when C_WF_CONNECTION */
|
||||
#define DRBD_CONNECT_INT_MIN 1
|
||||
#define DRBD_CONNECT_INT_MAX 120
|
||||
#define DRBD_CONNECT_INT_DEF 10 /* seconds */
|
||||
#define DRBD_CONNECT_INT_MIN 1U
|
||||
#define DRBD_CONNECT_INT_MAX 120U
|
||||
#define DRBD_CONNECT_INT_DEF 10U /* seconds */
|
||||
#define DRBD_CONNECT_INT_SCALE '1'
|
||||
|
||||
/* keep-alive probes when idle */
|
||||
#define DRBD_PING_INT_MIN 1
|
||||
#define DRBD_PING_INT_MAX 120
|
||||
#define DRBD_PING_INT_DEF 10
|
||||
#define DRBD_PING_INT_MIN 1U
|
||||
#define DRBD_PING_INT_MAX 120U
|
||||
#define DRBD_PING_INT_DEF 10U
|
||||
#define DRBD_PING_INT_SCALE '1'
|
||||
|
||||
/* timeout for the ping packets.*/
|
||||
#define DRBD_PING_TIMEO_MIN 1
|
||||
#define DRBD_PING_TIMEO_MAX 300
|
||||
#define DRBD_PING_TIMEO_DEF 5
|
||||
#define DRBD_PING_TIMEO_MIN 1U
|
||||
#define DRBD_PING_TIMEO_MAX 300U
|
||||
#define DRBD_PING_TIMEO_DEF 5U
|
||||
#define DRBD_PING_TIMEO_SCALE '1'
|
||||
|
||||
/* max number of write requests between write barriers */
|
||||
#define DRBD_MAX_EPOCH_SIZE_MIN 1
|
||||
#define DRBD_MAX_EPOCH_SIZE_MAX 20000
|
||||
#define DRBD_MAX_EPOCH_SIZE_DEF 2048
|
||||
#define DRBD_MAX_EPOCH_SIZE_MIN 1U
|
||||
#define DRBD_MAX_EPOCH_SIZE_MAX 20000U
|
||||
#define DRBD_MAX_EPOCH_SIZE_DEF 2048U
|
||||
#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
|
||||
|
||||
/* I don't think that a tcp send buffer of more than 10M is useful */
|
||||
#define DRBD_SNDBUF_SIZE_MIN 0
|
||||
#define DRBD_SNDBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_SNDBUF_SIZE_DEF 0
|
||||
#define DRBD_SNDBUF_SIZE_MIN 0U
|
||||
#define DRBD_SNDBUF_SIZE_MAX (10U<<20)
|
||||
#define DRBD_SNDBUF_SIZE_DEF 0U
|
||||
#define DRBD_SNDBUF_SIZE_SCALE '1'
|
||||
|
||||
#define DRBD_RCVBUF_SIZE_MIN 0
|
||||
#define DRBD_RCVBUF_SIZE_MAX (10<<20)
|
||||
#define DRBD_RCVBUF_SIZE_DEF 0
|
||||
#define DRBD_RCVBUF_SIZE_MIN 0U
|
||||
#define DRBD_RCVBUF_SIZE_MAX (10U<<20)
|
||||
#define DRBD_RCVBUF_SIZE_DEF 0U
|
||||
#define DRBD_RCVBUF_SIZE_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 128kB - 512MB */
|
||||
#define DRBD_MAX_BUFFERS_MIN 32
|
||||
#define DRBD_MAX_BUFFERS_MAX 131072
|
||||
#define DRBD_MAX_BUFFERS_DEF 2048
|
||||
#define DRBD_MAX_BUFFERS_MIN 32U
|
||||
#define DRBD_MAX_BUFFERS_MAX 131072U
|
||||
#define DRBD_MAX_BUFFERS_DEF 2048U
|
||||
#define DRBD_MAX_BUFFERS_SCALE '1'
|
||||
|
||||
/* @4k PageSize -> 4kB - 512MB */
|
||||
#define DRBD_UNPLUG_WATERMARK_MIN 1
|
||||
#define DRBD_UNPLUG_WATERMARK_MAX 131072
|
||||
#define DRBD_UNPLUG_WATERMARK_MIN 1U
|
||||
#define DRBD_UNPLUG_WATERMARK_MAX 131072U
|
||||
#define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
|
||||
#define DRBD_UNPLUG_WATERMARK_SCALE '1'
|
||||
|
||||
/* 0 is disabled.
|
||||
* 200 should be more than enough even for very short timeouts */
|
||||
#define DRBD_KO_COUNT_MIN 0
|
||||
#define DRBD_KO_COUNT_MAX 200
|
||||
#define DRBD_KO_COUNT_DEF 7
|
||||
#define DRBD_KO_COUNT_MIN 0U
|
||||
#define DRBD_KO_COUNT_MAX 200U
|
||||
#define DRBD_KO_COUNT_DEF 7U
|
||||
#define DRBD_KO_COUNT_SCALE '1'
|
||||
/* } */
|
||||
|
||||
/* syncer { */
|
||||
/* FIXME allow rate to be zero? */
|
||||
#define DRBD_RESYNC_RATE_MIN 1
|
||||
#define DRBD_RESYNC_RATE_MIN 1U
|
||||
/* channel bonding 10 GbE, or other hardware */
|
||||
#define DRBD_RESYNC_RATE_MAX (4 << 20)
|
||||
#define DRBD_RESYNC_RATE_DEF 250
|
||||
#define DRBD_RESYNC_RATE_DEF 250U
|
||||
#define DRBD_RESYNC_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_AL_EXTENTS_MIN 67
|
||||
#define DRBD_AL_EXTENTS_MIN 67U
|
||||
/* we use u16 as "slot number", (u16)~0 is "FREE".
|
||||
* If you use >= 292 kB on-disk ring buffer,
|
||||
* this is the maximum you can use: */
|
||||
#define DRBD_AL_EXTENTS_MAX 0xfffe
|
||||
#define DRBD_AL_EXTENTS_DEF 1237
|
||||
#define DRBD_AL_EXTENTS_MAX 0xfffeU
|
||||
#define DRBD_AL_EXTENTS_DEF 1237U
|
||||
#define DRBD_AL_EXTENTS_SCALE '1'
|
||||
|
||||
#define DRBD_MINOR_NUMBER_MIN -1
|
||||
@ -147,9 +147,9 @@
|
||||
* the upper limit with 64bit kernel, enough ram and flexible meta data
|
||||
* is 1 PiB, currently. */
|
||||
/* DRBD_MAX_SECTORS */
|
||||
#define DRBD_DISK_SIZE_MIN 0
|
||||
#define DRBD_DISK_SIZE_MAX (1 * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_DEF 0 /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_MIN 0LLU
|
||||
#define DRBD_DISK_SIZE_MAX (1LLU * (2LLU << 40))
|
||||
#define DRBD_DISK_SIZE_DEF 0LLU /* = disabled = no user size... */
|
||||
#define DRBD_DISK_SIZE_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_ON_IO_ERROR_DEF EP_DETACH
|
||||
@ -162,39 +162,39 @@
|
||||
#define DRBD_ON_CONGESTION_DEF OC_BLOCK
|
||||
#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
|
||||
|
||||
#define DRBD_MAX_BIO_BVECS_MIN 0
|
||||
#define DRBD_MAX_BIO_BVECS_MAX 128
|
||||
#define DRBD_MAX_BIO_BVECS_DEF 0
|
||||
#define DRBD_MAX_BIO_BVECS_MIN 0U
|
||||
#define DRBD_MAX_BIO_BVECS_MAX 128U
|
||||
#define DRBD_MAX_BIO_BVECS_DEF 0U
|
||||
#define DRBD_MAX_BIO_BVECS_SCALE '1'
|
||||
|
||||
#define DRBD_C_PLAN_AHEAD_MIN 0
|
||||
#define DRBD_C_PLAN_AHEAD_MAX 300
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 20
|
||||
#define DRBD_C_PLAN_AHEAD_MIN 0U
|
||||
#define DRBD_C_PLAN_AHEAD_MAX 300U
|
||||
#define DRBD_C_PLAN_AHEAD_DEF 20U
|
||||
#define DRBD_C_PLAN_AHEAD_SCALE '1'
|
||||
|
||||
#define DRBD_C_DELAY_TARGET_MIN 1
|
||||
#define DRBD_C_DELAY_TARGET_MAX 100
|
||||
#define DRBD_C_DELAY_TARGET_DEF 10
|
||||
#define DRBD_C_DELAY_TARGET_MIN 1U
|
||||
#define DRBD_C_DELAY_TARGET_MAX 100U
|
||||
#define DRBD_C_DELAY_TARGET_DEF 10U
|
||||
#define DRBD_C_DELAY_TARGET_SCALE '1'
|
||||
|
||||
#define DRBD_C_FILL_TARGET_MIN 0
|
||||
#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
|
||||
#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
|
||||
#define DRBD_C_FILL_TARGET_MIN 0U
|
||||
#define DRBD_C_FILL_TARGET_MAX (1U<<20) /* 500MByte in sec */
|
||||
#define DRBD_C_FILL_TARGET_DEF 100U /* Try to place 50KiB in socket send buffer during resync */
|
||||
#define DRBD_C_FILL_TARGET_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_C_MAX_RATE_MIN 250
|
||||
#define DRBD_C_MAX_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MAX_RATE_DEF 102400
|
||||
#define DRBD_C_MAX_RATE_MIN 250U
|
||||
#define DRBD_C_MAX_RATE_MAX (4U << 20)
|
||||
#define DRBD_C_MAX_RATE_DEF 102400U
|
||||
#define DRBD_C_MAX_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_C_MIN_RATE_MIN 0
|
||||
#define DRBD_C_MIN_RATE_MAX (4 << 20)
|
||||
#define DRBD_C_MIN_RATE_DEF 250
|
||||
#define DRBD_C_MIN_RATE_MIN 0U
|
||||
#define DRBD_C_MIN_RATE_MAX (4U << 20)
|
||||
#define DRBD_C_MIN_RATE_DEF 250U
|
||||
#define DRBD_C_MIN_RATE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_CONG_FILL_MIN 0
|
||||
#define DRBD_CONG_FILL_MAX (10<<21) /* 10GByte in sectors */
|
||||
#define DRBD_CONG_FILL_DEF 0
|
||||
#define DRBD_CONG_FILL_MIN 0U
|
||||
#define DRBD_CONG_FILL_MAX (10U<<21) /* 10GByte in sectors */
|
||||
#define DRBD_CONG_FILL_DEF 0U
|
||||
#define DRBD_CONG_FILL_SCALE 's' /* sectors */
|
||||
|
||||
#define DRBD_CONG_EXTENTS_MIN DRBD_AL_EXTENTS_MIN
|
||||
@ -204,48 +204,48 @@
|
||||
|
||||
#define DRBD_PROTOCOL_DEF DRBD_PROT_C
|
||||
|
||||
#define DRBD_DISK_BARRIER_DEF 0
|
||||
#define DRBD_DISK_FLUSHES_DEF 1
|
||||
#define DRBD_DISK_DRAIN_DEF 1
|
||||
#define DRBD_MD_FLUSHES_DEF 1
|
||||
#define DRBD_TCP_CORK_DEF 1
|
||||
#define DRBD_AL_UPDATES_DEF 1
|
||||
#define DRBD_DISK_BARRIER_DEF 0U
|
||||
#define DRBD_DISK_FLUSHES_DEF 1U
|
||||
#define DRBD_DISK_DRAIN_DEF 1U
|
||||
#define DRBD_MD_FLUSHES_DEF 1U
|
||||
#define DRBD_TCP_CORK_DEF 1U
|
||||
#define DRBD_AL_UPDATES_DEF 1U
|
||||
|
||||
/* We used to ignore the discard_zeroes_data setting.
|
||||
* To not change established (and expected) behaviour,
|
||||
* by default assume that, for discard_zeroes_data=0,
|
||||
* we can make that an effective discard_zeroes_data=1,
|
||||
* if we only explicitly zero-out unaligned partial chunks. */
|
||||
#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1
|
||||
#define DRBD_DISCARD_ZEROES_IF_ALIGNED_DEF 1U
|
||||
|
||||
/* Some backends pretend to support WRITE SAME,
|
||||
* but fail such requests when they are actually submitted.
|
||||
* This is to tell DRBD to not even try. */
|
||||
#define DRBD_DISABLE_WRITE_SAME_DEF 0
|
||||
#define DRBD_DISABLE_WRITE_SAME_DEF 0U
|
||||
|
||||
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
|
||||
#define DRBD_ALWAYS_ASBP_DEF 0
|
||||
#define DRBD_USE_RLE_DEF 1
|
||||
#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0
|
||||
#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0U
|
||||
#define DRBD_ALWAYS_ASBP_DEF 0U
|
||||
#define DRBD_USE_RLE_DEF 1U
|
||||
#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0U
|
||||
|
||||
#define DRBD_AL_STRIPES_MIN 1
|
||||
#define DRBD_AL_STRIPES_MAX 1024
|
||||
#define DRBD_AL_STRIPES_DEF 1
|
||||
#define DRBD_AL_STRIPES_MIN 1U
|
||||
#define DRBD_AL_STRIPES_MAX 1024U
|
||||
#define DRBD_AL_STRIPES_DEF 1U
|
||||
#define DRBD_AL_STRIPES_SCALE '1'
|
||||
|
||||
#define DRBD_AL_STRIPE_SIZE_MIN 4
|
||||
#define DRBD_AL_STRIPE_SIZE_MAX 16777216
|
||||
#define DRBD_AL_STRIPE_SIZE_DEF 32
|
||||
#define DRBD_AL_STRIPE_SIZE_MIN 4U
|
||||
#define DRBD_AL_STRIPE_SIZE_MAX 16777216U
|
||||
#define DRBD_AL_STRIPE_SIZE_DEF 32U
|
||||
#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
|
||||
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_MIN 0
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_MIN 0U
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_DEF 0
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_DEF 0U
|
||||
#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
|
||||
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_MIN 0
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_MAX (1<<20) /* 1MiByte */
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_DEF 0 /* disabled by default */
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_MIN 0U
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_MAX (1U<<20) /* 1MiByte */
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_DEF 0U /* disabled by default */
|
||||
#define DRBD_RS_DISCARD_GRANULARITY_SCALE '1' /* bytes */
|
||||
|
||||
#endif
|
||||
|
@ -1436,7 +1436,7 @@ struct task_struct {
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
struct request_queue *throttle_queue;
|
||||
struct gendisk *throttle_disk;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UPROBES
|
||||
|
@ -19,6 +19,8 @@
|
||||
#define UBLK_CMD_GET_PARAMS 0x09
|
||||
#define UBLK_CMD_START_USER_RECOVERY 0x10
|
||||
#define UBLK_CMD_END_USER_RECOVERY 0x11
|
||||
#define UBLK_CMD_GET_DEV_INFO2 0x12
|
||||
|
||||
/*
|
||||
* IO commands, issued by ublk server, and handled by ublk driver.
|
||||
*
|
||||
@ -79,6 +81,27 @@
|
||||
|
||||
#define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4)
|
||||
|
||||
/*
|
||||
* Unprivileged user can create /dev/ublkcN and /dev/ublkbN.
|
||||
*
|
||||
* /dev/ublk-control needs to be available for unprivileged user, and it
|
||||
* can be done via udev rule to make all control commands available to
|
||||
* unprivileged user. Except for the command of UBLK_CMD_ADD_DEV, all
|
||||
* other commands are only allowed for the owner of the specified device.
|
||||
*
|
||||
* When userspace sends UBLK_CMD_ADD_DEV, the device pair's owner_uid and
|
||||
* owner_gid are stored to ublksrv_ctrl_dev_info by kernel, so far only
|
||||
* the current user's uid/gid is stored, that said owner of the created
|
||||
* device is always the current user.
|
||||
*
|
||||
* We still need udev rule to apply OWNER/GROUP with the stored owner_uid
|
||||
* and owner_gid.
|
||||
*
|
||||
* Then ublk server can be run as unprivileged user, and /dev/ublkbN can
|
||||
* be accessed and managed by its owner represented by owner_uid/owner_gid.
|
||||
*/
|
||||
#define UBLK_F_UNPRIVILEGED_DEV (1UL << 5)
|
||||
|
||||
/* device state */
|
||||
#define UBLK_S_DEV_DEAD 0
|
||||
#define UBLK_S_DEV_LIVE 1
|
||||
@ -98,7 +121,15 @@ struct ublksrv_ctrl_cmd {
|
||||
__u64 addr;
|
||||
|
||||
/* inline data */
|
||||
__u64 data[2];
|
||||
__u64 data[1];
|
||||
|
||||
/*
|
||||
* Used for UBLK_F_UNPRIVILEGED_DEV and UBLK_CMD_GET_DEV_INFO2
|
||||
* only, include null char
|
||||
*/
|
||||
__u16 dev_path_len;
|
||||
__u16 pad;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct ublksrv_ctrl_dev_info {
|
||||
@ -118,7 +149,8 @@ struct ublksrv_ctrl_dev_info {
|
||||
/* For ublksrv internal use, invisible to ublk driver */
|
||||
__u64 ublksrv_flags;
|
||||
|
||||
__u64 reserved0;
|
||||
__u32 owner_uid; /* store by kernel */
|
||||
__u32 owner_gid; /* store by kernel */
|
||||
__u64 reserved1;
|
||||
__u64 reserved2;
|
||||
};
|
||||
@ -214,6 +246,17 @@ struct ublk_param_discard {
|
||||
__u16 reserved0;
|
||||
};
|
||||
|
||||
/*
|
||||
* read-only, can't set via UBLK_CMD_SET_PARAMS, disk_devt is available
|
||||
* after device is started
|
||||
*/
|
||||
struct ublk_param_devt {
|
||||
__u32 char_major;
|
||||
__u32 char_minor;
|
||||
__u32 disk_major;
|
||||
__u32 disk_minor;
|
||||
};
|
||||
|
||||
struct ublk_params {
|
||||
/*
|
||||
* Total length of parameters, userspace has to set 'len' for both
|
||||
@ -224,10 +267,12 @@ struct ublk_params {
|
||||
__u32 len;
|
||||
#define UBLK_PARAM_TYPE_BASIC (1 << 0)
|
||||
#define UBLK_PARAM_TYPE_DISCARD (1 << 1)
|
||||
#define UBLK_PARAM_TYPE_DEVT (1 << 2)
|
||||
__u32 types; /* types of parameter included */
|
||||
|
||||
struct ublk_param_basic basic;
|
||||
struct ublk_param_discard discard;
|
||||
struct ublk_param_devt devt;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1237,9 +1237,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
|
||||
size_t vec_len;
|
||||
|
||||
vec_len = min_t(size_t, size, PAGE_SIZE - off);
|
||||
imu->bvec[i].bv_page = pages[i];
|
||||
imu->bvec[i].bv_len = vec_len;
|
||||
imu->bvec[i].bv_offset = off;
|
||||
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
|
||||
off = 0;
|
||||
size -= vec_len;
|
||||
}
|
||||
|
@ -1044,7 +1044,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
tsk->throttle_queue = NULL;
|
||||
tsk->throttle_disk = NULL;
|
||||
tsk->use_memdelay = 0;
|
||||
#endif
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user