mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
for-linus-20190715
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAl0s1ZEQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpiCEEACE9H/pXoegTTWIVPVajMlsa19UHIeilk4N GI7oKSiirQEMZnAOmrEzgB4/0zyYQsVypys0gZlYUD3GJVsXDT3zzjNXL5NpVg/O nqwSGWMHBSjWkLbaM40Pb2QLXsYgveptNL+9PtxrgtoYPoT5/+TyrJMFrRfi72EK WFeNDKOu6aJxpJ26JSsckJ0gluKeeEpRoEqsgHGIwaMIGHQf+b+ikk7tel5FAIgA uDwwD+Oxsdgh/ChsXL0d90GkcbcSp6GQ7GybxVmw/tPijx6mpeIY72xY3Zx+t8zF b71UNk6NmCKjOPO/6fiuYKKTYw+KhzlyEKO0j675HKfx2AhchEwKw0irp4yUlydA zxWYmz4U7iRgktJtymv3J4FEQQ3S6d1EnuQkQNX1LwiOsEsfzhkWi+7jy7KFhZoJ AqtYzqnOXvLx92q0vloj06HtK6zo+I/MINldy0+qn9lq0N0VF+dctyztAHLsF7P6 pUtS6i7l1JSFKAmMhC31sIj5TImaehM2e/TWMUPEDZaO96oKCmQwOF1oiloc6vlW h4xWsxP/9zOFcWNyPzy6Vo3JUXWRvFA7K+jV3Hsukw6rVHiNCGVYGSlTv8Roi5b7 I4ggu9R2JOGyku7UIlL50IRxEyjAp11LaO8yHhcCnRB65rmyBuNMQNcfOsfxpZ5Y 1mtSNhm5TQ== =g8xI -----END PGP SIGNATURE----- Merge tag 'for-linus-20190715' of git://git.kernel.dk/linux-block Pull more block updates from Jens Axboe: "A later pull request with some followup items. I had some vacation coming up to the merge window, so certain things items were delayed a bit. This pull request also contains fixes that came in within the last few days of the merge window, which I didn't want to push right before sending you a pull request. This contains: - NVMe pull request, mostly fixes, but also a few minor items on the feature side that were timing constrained (Christoph et al) - Report zones fixes (Damien) - Removal of dead code (Damien) - Turn on cgroup psi memstall (Josef) - block cgroup MAINTAINERS entry (Konstantin) - Flush init fix (Josef) - blk-throttle low iops timing fix (Konstantin) - nbd resize fixes (Mike) - nbd 0 blocksize crash fix (Xiubo) - block integrity error leak fix (Wenwen) - blk-cgroup writeback and priority inheritance fixes (Tejun)" * tag 'for-linus-20190715' of git://git.kernel.dk/linux-block: (42 commits) MAINTAINERS: add entry for block io cgroup null_blk: fixup ->report_zones() for !CONFIG_BLK_DEV_ZONED block: Limit zone array allocation size sd_zbc: Fix report zones buffer allocation block: Kill gfp_t argument of blkdev_report_zones() block: Allow mapping of vmalloc-ed buffers block/bio-integrity: fix a memory leak bug nvme: fix NULL deref for fabrics options nbd: add netlink reconfigure resize support nbd: fix crash when the blksize is zero block: Disable write plugging for zoned block devices block: Fix elevator name declaration block: Remove unused definitions nvme: fix regression upon hot device removal and insertion blk-throttle: fix zero wait time for iops throttled group block: Fix potential overflow in blk_report_zones() blkcg: implement REQ_CGROUP_PUNT blkcg, writeback: Implement wbc_blkcg_css() blkcg, writeback: Add wbc->no_cgroup_owner blkcg, writeback: Rename wbc_account_io() to wbc_account_cgroup_owner() ...
This commit is contained in:
commit
9637d51734
@ -2124,7 +2124,7 @@ following two functions.
|
||||
a queue (device) has been associated with the bio and
|
||||
before submission.
|
||||
|
||||
wbc_account_io(@wbc, @page, @bytes)
|
||||
wbc_account_cgroup_owner(@wbc, @page, @bytes)
|
||||
Should be called for each data segment being written out.
|
||||
While this function doesn't care exactly when it's called
|
||||
during the writeback session, it's the easiest and most
|
||||
|
@ -843,11 +843,6 @@ elevator_latter_req_fn These return the request before or after the
|
||||
|
||||
elevator_completed_req_fn called when a request is completed.
|
||||
|
||||
elevator_may_queue_fn returns true if the scheduler wants to allow the
|
||||
current context to queue a new request even if
|
||||
it is over the queue limit. This must be used
|
||||
very carefully!!
|
||||
|
||||
elevator_set_req_fn
|
||||
elevator_put_req_fn Must be used to allocate and free any elevator
|
||||
specific storage for a request.
|
||||
|
13
MAINTAINERS
13
MAINTAINERS
@ -4183,6 +4183,19 @@ S: Maintained
|
||||
F: mm/memcontrol.c
|
||||
F: mm/swap_cgroup.c
|
||||
|
||||
CONTROL GROUP - BLOCK IO CONTROLLER (BLKIO)
|
||||
M: Tejun Heo <tj@kernel.org>
|
||||
M: Jens Axboe <axboe@kernel.dk>
|
||||
L: cgroups@vger.kernel.org
|
||||
L: linux-block@vger.kernel.org
|
||||
T: git git://git.kernel.dk/linux-block
|
||||
F: Documentation/cgroup-v1/blkio-controller.rst
|
||||
F: block/blk-cgroup.c
|
||||
F: include/linux/blk-cgroup.h
|
||||
F: block/blk-throttle.c
|
||||
F: block/blk-iolatency.c
|
||||
F: block/bfq-cgroup.c
|
||||
|
||||
CORETEMP HARDWARE MONITORING DRIVER
|
||||
M: Fenghua Yu <fenghua.yu@intel.com>
|
||||
L: linux-hwmon@vger.kernel.org
|
||||
|
@ -276,8 +276,12 @@ bool bio_integrity_prep(struct bio *bio)
|
||||
ret = bio_integrity_add_page(bio, virt_to_page(buf),
|
||||
bytes, offset);
|
||||
|
||||
if (ret == 0)
|
||||
return false;
|
||||
if (ret == 0) {
|
||||
printk(KERN_ERR "could not attach integrity payload\n");
|
||||
kfree(buf);
|
||||
status = BLK_STS_RESOURCE;
|
||||
goto err_end_io;
|
||||
}
|
||||
|
||||
if (ret < bytes)
|
||||
break;
|
||||
|
28
block/bio.c
28
block/bio.c
@ -16,6 +16,7 @@
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#include <trace/events/block.h>
|
||||
#include "blk.h"
|
||||
@ -1441,8 +1442,22 @@ void bio_unmap_user(struct bio *bio)
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void bio_invalidate_vmalloc_pages(struct bio *bio)
|
||||
{
|
||||
#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
|
||||
if (bio->bi_private && !op_is_write(bio_op(bio))) {
|
||||
unsigned long i, len = 0;
|
||||
|
||||
for (i = 0; i < bio->bi_vcnt; i++)
|
||||
len += bio->bi_io_vec[i].bv_len;
|
||||
invalidate_kernel_vmap_range(bio->bi_private, len);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void bio_map_kern_endio(struct bio *bio)
|
||||
{
|
||||
bio_invalidate_vmalloc_pages(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
@ -1463,6 +1478,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
unsigned long start = kaddr >> PAGE_SHIFT;
|
||||
const int nr_pages = end - start;
|
||||
bool is_vmalloc = is_vmalloc_addr(data);
|
||||
struct page *page;
|
||||
int offset, i;
|
||||
struct bio *bio;
|
||||
|
||||
@ -1470,6 +1487,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
if (!bio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (is_vmalloc) {
|
||||
flush_kernel_vmap_range(data, len);
|
||||
bio->bi_private = data;
|
||||
}
|
||||
|
||||
offset = offset_in_page(kaddr);
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
unsigned int bytes = PAGE_SIZE - offset;
|
||||
@ -1480,7 +1502,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
|
||||
if (bytes > len)
|
||||
bytes = len;
|
||||
|
||||
if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
|
||||
if (!is_vmalloc)
|
||||
page = virt_to_page(data);
|
||||
else
|
||||
page = vmalloc_to_page(data);
|
||||
if (bio_add_pc_page(q, bio, page, bytes,
|
||||
offset) < bytes) {
|
||||
/* we don't support partial mappings */
|
||||
bio_put(bio);
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/tracehook.h>
|
||||
#include <linux/psi.h>
|
||||
#include "blk.h"
|
||||
|
||||
#define MAX_KEY_LEN 100
|
||||
@ -47,12 +48,14 @@ struct blkcg blkcg_root;
|
||||
EXPORT_SYMBOL_GPL(blkcg_root);
|
||||
|
||||
struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css;
|
||||
EXPORT_SYMBOL_GPL(blkcg_root_css);
|
||||
|
||||
static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
|
||||
|
||||
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
|
||||
|
||||
static bool blkcg_debug_stats = false;
|
||||
static struct workqueue_struct *blkcg_punt_bio_wq;
|
||||
|
||||
static bool blkcg_policy_enabled(struct request_queue *q,
|
||||
const struct blkcg_policy *pol)
|
||||
@ -87,6 +90,8 @@ static void __blkg_release(struct rcu_head *rcu)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
|
||||
|
||||
WARN_ON(!bio_list_empty(&blkg->async_bios));
|
||||
|
||||
/* release the blkcg and parent blkg refs this blkg has been holding */
|
||||
css_put(&blkg->blkcg->css);
|
||||
if (blkg->parent)
|
||||
@ -112,6 +117,23 @@ static void blkg_release(struct percpu_ref *ref)
|
||||
call_rcu(&blkg->rcu_head, __blkg_release);
|
||||
}
|
||||
|
||||
static void blkg_async_bio_workfn(struct work_struct *work)
|
||||
{
|
||||
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
|
||||
async_bio_work);
|
||||
struct bio_list bios = BIO_EMPTY_LIST;
|
||||
struct bio *bio;
|
||||
|
||||
/* as long as there are pending bios, @blkg can't go away */
|
||||
spin_lock_bh(&blkg->async_bio_lock);
|
||||
bio_list_merge(&bios, &blkg->async_bios);
|
||||
bio_list_init(&blkg->async_bios);
|
||||
spin_unlock_bh(&blkg->async_bio_lock);
|
||||
|
||||
while ((bio = bio_list_pop(&bios)))
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
/**
|
||||
* blkg_alloc - allocate a blkg
|
||||
* @blkcg: block cgroup the new blkg is associated with
|
||||
@ -140,6 +162,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
|
||||
|
||||
blkg->q = q;
|
||||
INIT_LIST_HEAD(&blkg->q_node);
|
||||
spin_lock_init(&blkg->async_bio_lock);
|
||||
bio_list_init(&blkg->async_bios);
|
||||
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
|
||||
blkg->blkcg = blkcg;
|
||||
|
||||
for (i = 0; i < BLKCG_MAX_POLS; i++) {
|
||||
@ -1526,6 +1551,25 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);
|
||||
|
||||
bool __blkcg_punt_bio_submit(struct bio *bio)
|
||||
{
|
||||
struct blkcg_gq *blkg = bio->bi_blkg;
|
||||
|
||||
/* consume the flag first */
|
||||
bio->bi_opf &= ~REQ_CGROUP_PUNT;
|
||||
|
||||
/* never bounce for the root cgroup */
|
||||
if (!blkg->parent)
|
||||
return false;
|
||||
|
||||
spin_lock_bh(&blkg->async_bio_lock);
|
||||
bio_list_add(&blkg->async_bios, bio);
|
||||
spin_unlock_bh(&blkg->async_bio_lock);
|
||||
|
||||
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scale the accumulated delay based on how long it has been since we updated
|
||||
* the delay. We only call this when we are adding delay, in case it's been a
|
||||
@ -1587,6 +1631,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
|
||||
*/
|
||||
static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
{
|
||||
unsigned long pflags;
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
u64 exp;
|
||||
u64 delay_nsec = 0;
|
||||
@ -1613,11 +1658,8 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
*/
|
||||
delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC);
|
||||
|
||||
/*
|
||||
* TODO: the use_memdelay flag is going to be for the upcoming psi stuff
|
||||
* that hasn't landed upstream yet. Once that stuff is in place we need
|
||||
* to do a psi_memstall_enter/leave if memdelay is set.
|
||||
*/
|
||||
if (use_memdelay)
|
||||
psi_memstall_enter(&pflags);
|
||||
|
||||
exp = ktime_add_ns(now, delay_nsec);
|
||||
tok = io_schedule_prepare();
|
||||
@ -1627,6 +1669,9 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
|
||||
break;
|
||||
} while (!fatal_signal_pending(current));
|
||||
io_schedule_finish(tok);
|
||||
|
||||
if (use_memdelay)
|
||||
psi_memstall_leave(&pflags);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1726,5 +1771,16 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta)
|
||||
atomic64_add(delta, &blkg->delay_nsec);
|
||||
}
|
||||
|
||||
static int __init blkcg_init(void)
|
||||
{
|
||||
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
|
||||
WQ_MEM_RECLAIM | WQ_FREEZABLE |
|
||||
WQ_UNBOUND | WQ_SYSFS, 0);
|
||||
if (!blkcg_punt_bio_wq)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(blkcg_init);
|
||||
|
||||
module_param(blkcg_debug_stats, bool, 0644);
|
||||
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
|
||||
|
@ -117,6 +117,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
|
||||
rq->internal_tag = -1;
|
||||
rq->start_time_ns = ktime_get_ns();
|
||||
rq->part = NULL;
|
||||
refcount_set(&rq->ref, 1);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_rq_init);
|
||||
|
||||
@ -687,7 +688,7 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
|
||||
struct request *rq;
|
||||
struct list_head *plug_list;
|
||||
|
||||
plug = current->plug;
|
||||
plug = blk_mq_plug(q, bio);
|
||||
if (!plug)
|
||||
return false;
|
||||
|
||||
@ -1127,6 +1128,9 @@ EXPORT_SYMBOL_GPL(direct_make_request);
|
||||
*/
|
||||
blk_qc_t submit_bio(struct bio *bio)
|
||||
{
|
||||
if (blkcg_punt_bio_submit(bio))
|
||||
return BLK_QC_T_NONE;
|
||||
|
||||
/*
|
||||
* If it's a regular read/write or a barrier with data attached,
|
||||
* go through the normal accounting stuff before submission.
|
||||
|
@ -1973,7 +1973,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
blk_mq_bio_to_request(rq, bio, nr_segs);
|
||||
|
||||
plug = current->plug;
|
||||
plug = blk_mq_plug(q, bio);
|
||||
if (unlikely(is_flush_fua)) {
|
||||
/* bypass scheduler for flush rq */
|
||||
blk_insert_flush(rq);
|
||||
|
@ -233,4 +233,36 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_queue_map *qmap)
|
||||
qmap->mq_map[cpu] = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* blk_mq_plug() - Get caller context plug
|
||||
* @q: request queue
|
||||
* @bio : the bio being submitted by the caller context
|
||||
*
|
||||
* Plugging, by design, may delay the insertion of BIOs into the elevator in
|
||||
* order to increase BIO merging opportunities. This however can cause BIO
|
||||
* insertion order to change from the order in which submit_bio() is being
|
||||
* executed in the case of multiple contexts concurrently issuing BIOs to a
|
||||
* device, even if these context are synchronized to tightly control BIO issuing
|
||||
* order. While this is not a problem with regular block devices, this ordering
|
||||
* change can cause write BIO failures with zoned block devices as these
|
||||
* require sequential write patterns to zones. Prevent this from happening by
|
||||
* ignoring the plug state of a BIO issuing context if the target request queue
|
||||
* is for a zoned block device and the BIO to plug is a write operation.
|
||||
*
|
||||
* Return current->plug if the bio can be plugged and NULL otherwise
|
||||
*/
|
||||
static inline struct blk_plug *blk_mq_plug(struct request_queue *q,
|
||||
struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* For regular block devices or read operations, use the context plug
|
||||
* which may be NULL if blk_start_plug() was not executed.
|
||||
*/
|
||||
if (!blk_queue_is_zoned(q) || !op_is_write(bio_op(bio)))
|
||||
return current->plug;
|
||||
|
||||
/* Zoned block device write operation case: do not plug the BIO */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -881,13 +881,10 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
|
||||
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
|
||||
u64 tmp;
|
||||
|
||||
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
|
||||
jiffy_elapsed = jiffies - tg->slice_start[rw];
|
||||
|
||||
/* Slice has just started. Consider one slice interval */
|
||||
if (!jiffy_elapsed)
|
||||
jiffy_elapsed_rnd = tg->td->throtl_slice;
|
||||
|
||||
jiffy_elapsed_rnd = roundup(jiffy_elapsed_rnd, tg->td->throtl_slice);
|
||||
/* Round up to the next throttle slice, wait time must be nonzero */
|
||||
jiffy_elapsed_rnd = roundup(jiffy_elapsed + 1, tg->td->throtl_slice);
|
||||
|
||||
/*
|
||||
* jiffy_elapsed_rnd should not be a big value as minimum iops can be
|
||||
|
@ -14,6 +14,9 @@
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
@ -70,7 +73,7 @@ EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
|
||||
static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
|
||||
sector_t nr_sectors)
|
||||
{
|
||||
unsigned long zone_sectors = blk_queue_zone_sectors(q);
|
||||
sector_t zone_sectors = blk_queue_zone_sectors(q);
|
||||
|
||||
return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
|
||||
}
|
||||
@ -117,8 +120,7 @@ static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
|
||||
}
|
||||
|
||||
static int blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct request_queue *q = disk->queue;
|
||||
unsigned int z = 0, n, nrz = *nr_zones;
|
||||
@ -127,8 +129,7 @@ static int blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
|
||||
while (z < nrz && sector < capacity) {
|
||||
n = nrz - z;
|
||||
ret = disk->fops->report_zones(disk, sector, &zones[z], &n,
|
||||
gfp_mask);
|
||||
ret = disk->fops->report_zones(disk, sector, &zones[z], &n);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (!n)
|
||||
@ -149,17 +150,18 @@ static int blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
* @sector: Sector from which to report zones
|
||||
* @zones: Array of zone structures where to return the zones information
|
||||
* @nr_zones: Number of zone structures in the zone array
|
||||
* @gfp_mask: Memory allocation flags (for bio_alloc)
|
||||
*
|
||||
* Description:
|
||||
* Get zone information starting from the zone containing @sector.
|
||||
* The number of zone information reported may be less than the number
|
||||
* requested by @nr_zones. The number of zones actually reported is
|
||||
* returned in @nr_zones.
|
||||
* The caller must use memalloc_noXX_save/restore() calls to control
|
||||
* memory allocations done within this function (zone array and command
|
||||
* buffer allocation by the device driver).
|
||||
*/
|
||||
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
unsigned int i, nrz;
|
||||
@ -184,7 +186,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
|
||||
nrz = min(*nr_zones,
|
||||
__blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
|
||||
ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
|
||||
zones, &nrz, gfp_mask);
|
||||
zones, &nrz);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -305,9 +307,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
if (!zones)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = blkdev_report_zones(bdev, rep.sector,
|
||||
zones, &rep.nr_zones,
|
||||
GFP_KERNEL);
|
||||
ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -373,22 +373,25 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
|
||||
* Allocate an array of struct blk_zone to get nr_zones zone information.
|
||||
* The allocated array may be smaller than nr_zones.
|
||||
*/
|
||||
static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
|
||||
static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
|
||||
{
|
||||
size_t size = *nr_zones * sizeof(struct blk_zone);
|
||||
struct page *page;
|
||||
int order;
|
||||
struct blk_zone *zones;
|
||||
size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
|
||||
|
||||
for (order = get_order(size); order >= 0; order--) {
|
||||
page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
|
||||
if (page) {
|
||||
*nr_zones = min_t(unsigned int, *nr_zones,
|
||||
(PAGE_SIZE << order) / sizeof(struct blk_zone));
|
||||
return page_address(page);
|
||||
}
|
||||
/*
|
||||
* GFP_KERNEL here is meaningless as the caller task context has
|
||||
* the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
|
||||
* with memalloc_noio_save().
|
||||
*/
|
||||
zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
|
||||
if (!zones) {
|
||||
*nr_zones = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
*nr_zones = nrz;
|
||||
|
||||
return zones;
|
||||
}
|
||||
|
||||
void blk_queue_free_zone_bitmaps(struct request_queue *q)
|
||||
@ -415,6 +418,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
|
||||
unsigned int i, rep_nr_zones = 0, z = 0, nrz;
|
||||
struct blk_zone *zones = NULL;
|
||||
unsigned int noio_flag;
|
||||
sector_t sector = 0;
|
||||
int ret = 0;
|
||||
|
||||
@ -427,6 +431,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that all memory allocations in this context are done as
|
||||
* if GFP_NOIO was specified.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
|
||||
if (!blk_queue_is_zoned(q) || !nr_zones) {
|
||||
nr_zones = 0;
|
||||
goto update;
|
||||
@ -443,13 +453,13 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
|
||||
/* Get zone information and initialize seq_zones_bitmap */
|
||||
rep_nr_zones = nr_zones;
|
||||
zones = blk_alloc_zones(q->node, &rep_nr_zones);
|
||||
zones = blk_alloc_zones(&rep_nr_zones);
|
||||
if (!zones)
|
||||
goto out;
|
||||
|
||||
while (z < nr_zones) {
|
||||
nrz = min(nr_zones - z, rep_nr_zones);
|
||||
ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO);
|
||||
ret = blk_report_zones(disk, sector, zones, &nrz);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (!nrz)
|
||||
@ -480,8 +490,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
out:
|
||||
free_pages((unsigned long)zones,
|
||||
get_order(rep_nr_zones * sizeof(struct blk_zone)));
|
||||
memalloc_noio_restore(noio_flag);
|
||||
|
||||
kvfree(zones);
|
||||
kfree(seq_zones_wlock);
|
||||
kfree(seq_zones_bitmap);
|
||||
|
||||
|
@ -134,6 +134,8 @@ static struct dentry *nbd_dbg_dir;
|
||||
|
||||
#define NBD_MAGIC 0x68797548
|
||||
|
||||
#define NBD_DEF_BLKSIZE 1024
|
||||
|
||||
static unsigned int nbds_max = 16;
|
||||
static int max_part = 16;
|
||||
static struct workqueue_struct *recv_workqueue;
|
||||
@ -1236,6 +1238,14 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
|
||||
nbd_config_put(nbd);
|
||||
}
|
||||
|
||||
static bool nbd_is_valid_blksize(unsigned long blksize)
|
||||
{
|
||||
if (!blksize || !is_power_of_2(blksize) || blksize < 512 ||
|
||||
blksize > PAGE_SIZE)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Must be called with config_lock held */
|
||||
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
@ -1251,8 +1261,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
case NBD_SET_SOCK:
|
||||
return nbd_add_socket(nbd, arg, false);
|
||||
case NBD_SET_BLKSIZE:
|
||||
if (!arg || !is_power_of_2(arg) || arg < 512 ||
|
||||
arg > PAGE_SIZE)
|
||||
if (!arg)
|
||||
arg = NBD_DEF_BLKSIZE;
|
||||
if (!nbd_is_valid_blksize(arg))
|
||||
return -EINVAL;
|
||||
nbd_size_set(nbd, arg,
|
||||
div_s64(config->bytesize, arg));
|
||||
@ -1332,7 +1343,7 @@ static struct nbd_config *nbd_alloc_config(void)
|
||||
atomic_set(&config->recv_threads, 0);
|
||||
init_waitqueue_head(&config->recv_wq);
|
||||
init_waitqueue_head(&config->conn_wait);
|
||||
config->blksize = 1024;
|
||||
config->blksize = NBD_DEF_BLKSIZE;
|
||||
atomic_set(&config->live_connections, 0);
|
||||
try_module_get(THIS_MODULE);
|
||||
return config;
|
||||
@ -1673,6 +1684,30 @@ nbd_device_policy[NBD_DEVICE_ATTR_MAX + 1] = {
|
||||
[NBD_DEVICE_CONNECTED] = { .type = NLA_U8 },
|
||||
};
|
||||
|
||||
static int nbd_genl_size_set(struct genl_info *info, struct nbd_device *nbd)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
u64 bsize = config->blksize;
|
||||
u64 bytes = config->bytesize;
|
||||
|
||||
if (info->attrs[NBD_ATTR_SIZE_BYTES])
|
||||
bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
|
||||
|
||||
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
|
||||
bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
|
||||
if (!bsize)
|
||||
bsize = NBD_DEF_BLKSIZE;
|
||||
if (!nbd_is_valid_blksize(bsize)) {
|
||||
printk(KERN_ERR "Invalid block size %llu\n", bsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
if (bytes != config->bytesize || bsize != config->blksize)
|
||||
nbd_size_set(nbd, bsize, div64_u64(bytes, bsize));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
{
|
||||
struct nbd_device *nbd = NULL;
|
||||
@ -1760,16 +1795,10 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
refcount_set(&nbd->config_refs, 1);
|
||||
set_bit(NBD_BOUND, &config->runtime_flags);
|
||||
|
||||
if (info->attrs[NBD_ATTR_SIZE_BYTES]) {
|
||||
u64 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]);
|
||||
nbd_size_set(nbd, config->blksize,
|
||||
div64_u64(bytes, config->blksize));
|
||||
}
|
||||
if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) {
|
||||
u64 bsize =
|
||||
nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]);
|
||||
nbd_size_set(nbd, bsize, div64_u64(config->bytesize, bsize));
|
||||
}
|
||||
ret = nbd_genl_size_set(info, nbd);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (info->attrs[NBD_ATTR_TIMEOUT]) {
|
||||
u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
|
||||
nbd->tag_set.timeout = timeout * HZ;
|
||||
@ -1938,6 +1967,10 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = nbd_genl_size_set(info, nbd);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (info->attrs[NBD_ATTR_TIMEOUT]) {
|
||||
u64 timeout = nla_get_u64(info->attrs[NBD_ATTR_TIMEOUT]);
|
||||
nbd->tag_set.timeout = timeout * HZ;
|
||||
|
@ -89,8 +89,7 @@ struct nullb {
|
||||
int null_zone_init(struct nullb_device *dev);
|
||||
void null_zone_exit(struct nullb_device *dev);
|
||||
int null_zone_report(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
struct blk_zone *zones, unsigned int *nr_zones);
|
||||
void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
unsigned int nr_sectors);
|
||||
void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
|
||||
@ -103,7 +102,7 @@ static inline int null_zone_init(struct nullb_device *dev)
|
||||
static inline void null_zone_exit(struct nullb_device *dev) {}
|
||||
static inline int null_zone_report(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones,
|
||||
unsigned int *nr_zones, gfp_t gfp_mask)
|
||||
unsigned int *nr_zones)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
@ -67,8 +67,7 @@ void null_zone_exit(struct nullb_device *dev)
|
||||
}
|
||||
|
||||
int null_zone_report(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct nullb *nullb = disk->private_data;
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
|
@ -461,15 +461,14 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
static int flakey_report_zones(struct dm_target *ti, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct flakey_c *fc = ti->private;
|
||||
int ret;
|
||||
|
||||
/* Do report and remap it */
|
||||
ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
|
||||
zones, nr_zones, gfp_mask);
|
||||
zones, nr_zones);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
|
@ -137,15 +137,14 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
static int linear_report_zones(struct dm_target *ti, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct linear_c *lc = (struct linear_c *) ti->private;
|
||||
int ret;
|
||||
|
||||
/* Do report and remap it */
|
||||
ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
|
||||
zones, nr_zones, gfp_mask);
|
||||
zones, nr_zones);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#define DM_MSG_PREFIX "zoned metadata"
|
||||
|
||||
@ -1162,8 +1163,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
|
||||
while (sector < dev->capacity) {
|
||||
/* Get zone information */
|
||||
nr_blkz = DMZ_REPORT_NR_ZONES;
|
||||
ret = blkdev_report_zones(dev->bdev, sector, blkz,
|
||||
&nr_blkz, GFP_KERNEL);
|
||||
ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz);
|
||||
if (ret) {
|
||||
dmz_dev_err(dev, "Report zones failed %d", ret);
|
||||
goto out;
|
||||
@ -1201,12 +1201,20 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
|
||||
static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
|
||||
{
|
||||
unsigned int nr_blkz = 1;
|
||||
unsigned int noio_flag;
|
||||
struct blk_zone blkz;
|
||||
int ret;
|
||||
|
||||
/* Get zone information from disk */
|
||||
/*
|
||||
* Get zone information from disk. Since blkdev_report_zones() uses
|
||||
* GFP_KERNEL by default for memory allocations, set the per-task
|
||||
* PF_MEMALLOC_NOIO flag so that all allocations are done as if
|
||||
* GFP_NOIO was specified.
|
||||
*/
|
||||
noio_flag = memalloc_noio_save();
|
||||
ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
|
||||
&blkz, &nr_blkz, GFP_NOIO);
|
||||
&blkz, &nr_blkz);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
if (!nr_blkz)
|
||||
ret = -EIO;
|
||||
if (ret) {
|
||||
|
@ -441,8 +441,7 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
}
|
||||
|
||||
static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
struct mapped_device *md = disk->private_data;
|
||||
@ -480,8 +479,7 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
|
||||
* So there is no need to loop here trying to fill the entire array
|
||||
* of zones.
|
||||
*/
|
||||
ret = tgt->type->report_zones(tgt, sector, zones,
|
||||
nr_zones, gfp_mask);
|
||||
ret = tgt->type->report_zones(tgt, sector, zones, nr_zones);
|
||||
|
||||
out:
|
||||
dm_put_live_table(md, srcu_idx);
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
@ -1626,6 +1627,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
||||
{
|
||||
sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
|
||||
unsigned short bs = 1 << ns->lba_shift;
|
||||
u32 atomic_bs, phys_bs, io_opt;
|
||||
|
||||
if (ns->lba_shift > PAGE_SHIFT) {
|
||||
/* unsupported block size, set capacity to 0 later */
|
||||
@ -1634,9 +1636,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
|
||||
blk_mq_freeze_queue(disk->queue);
|
||||
blk_integrity_unregister(disk);
|
||||
|
||||
if (id->nabo == 0) {
|
||||
/*
|
||||
* Bit 1 indicates whether NAWUPF is defined for this namespace
|
||||
* and whether it should be used instead of AWUPF. If NAWUPF ==
|
||||
* 0 then AWUPF must be used instead.
|
||||
*/
|
||||
if (id->nsfeat & (1 << 1) && id->nawupf)
|
||||
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
|
||||
else
|
||||
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
|
||||
} else {
|
||||
atomic_bs = bs;
|
||||
}
|
||||
phys_bs = bs;
|
||||
io_opt = bs;
|
||||
if (id->nsfeat & (1 << 4)) {
|
||||
/* NPWG = Namespace Preferred Write Granularity */
|
||||
phys_bs *= 1 + le16_to_cpu(id->npwg);
|
||||
/* NOWS = Namespace Optimal Write Size */
|
||||
io_opt *= 1 + le16_to_cpu(id->nows);
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(disk->queue, bs);
|
||||
blk_queue_physical_block_size(disk->queue, bs);
|
||||
blk_queue_io_min(disk->queue, bs);
|
||||
/*
|
||||
* Linux filesystems assume writing a single physical block is
|
||||
* an atomic operation. Hence limit the physical block size to the
|
||||
* value of the Atomic Write Unit Power Fail parameter.
|
||||
*/
|
||||
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
|
||||
blk_queue_io_min(disk->queue, phys_bs);
|
||||
blk_queue_io_opt(disk->queue, io_opt);
|
||||
|
||||
if (ns->ms && !ns->ext &&
|
||||
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
|
||||
@ -2386,8 +2416,8 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
|
||||
lockdep_assert_held(&nvme_subsystems_lock);
|
||||
|
||||
list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) {
|
||||
if (ctrl->state == NVME_CTRL_DELETING ||
|
||||
ctrl->state == NVME_CTRL_DEAD)
|
||||
if (tmp->state == NVME_CTRL_DELETING ||
|
||||
tmp->state == NVME_CTRL_DEAD)
|
||||
continue;
|
||||
|
||||
if (tmp->cntlid == ctrl->cntlid) {
|
||||
@ -2433,6 +2463,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
|
||||
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
|
||||
subsys->vendor_id = le16_to_cpu(id->vid);
|
||||
subsys->cmic = id->cmic;
|
||||
subsys->awupf = le16_to_cpu(id->awupf);
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
subsys->iopolicy = NVME_IOPOLICY_NUMA;
|
||||
#endif
|
||||
@ -3274,6 +3305,10 @@ static int nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
goto out_free_ns;
|
||||
}
|
||||
|
||||
if (ctrl->opts && ctrl->opts->data_digest)
|
||||
ns->queue->backing_dev_info->capabilities
|
||||
|= BDI_CAP_STABLE_WRITES;
|
||||
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
|
||||
if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
|
||||
blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
|
||||
|
@ -204,6 +204,9 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
|
||||
|
||||
static struct workqueue_struct *nvme_fc_wq;
|
||||
|
||||
static bool nvme_fc_waiting_to_unload;
|
||||
static DECLARE_COMPLETION(nvme_fc_unload_proceed);
|
||||
|
||||
/*
|
||||
* These items are short-term. They will eventually be moved into
|
||||
* a generic FC class. See comments in module init.
|
||||
@ -229,6 +232,8 @@ nvme_fc_free_lport(struct kref *ref)
|
||||
/* remove from transport list */
|
||||
spin_lock_irqsave(&nvme_fc_lock, flags);
|
||||
list_del(&lport->port_list);
|
||||
if (nvme_fc_waiting_to_unload && list_empty(&nvme_fc_lport_list))
|
||||
complete(&nvme_fc_unload_proceed);
|
||||
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
||||
|
||||
ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num);
|
||||
@ -3457,11 +3462,51 @@ static int __init nvme_fc_init_module(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_delete_controllers(struct nvme_fc_rport *rport)
|
||||
{
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
|
||||
spin_lock(&rport->lock);
|
||||
list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: transport unloading: deleting ctrl\n",
|
||||
ctrl->cnum);
|
||||
nvme_delete_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
spin_unlock(&rport->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_cleanup_for_unload(void)
|
||||
{
|
||||
struct nvme_fc_lport *lport;
|
||||
struct nvme_fc_rport *rport;
|
||||
|
||||
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
|
||||
list_for_each_entry(rport, &lport->endp_list, endp_list) {
|
||||
nvme_fc_delete_controllers(rport);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void __exit nvme_fc_exit_module(void)
|
||||
{
|
||||
/* sanity check - all lports should be removed */
|
||||
if (!list_empty(&nvme_fc_lport_list))
|
||||
pr_warn("%s: localport list not empty\n", __func__);
|
||||
unsigned long flags;
|
||||
bool need_cleanup = false;
|
||||
|
||||
spin_lock_irqsave(&nvme_fc_lock, flags);
|
||||
nvme_fc_waiting_to_unload = true;
|
||||
if (!list_empty(&nvme_fc_lport_list)) {
|
||||
need_cleanup = true;
|
||||
nvme_fc_cleanup_for_unload();
|
||||
}
|
||||
spin_unlock_irqrestore(&nvme_fc_lock, flags);
|
||||
if (need_cleanup) {
|
||||
pr_info("%s: waiting for ctlr deletes\n", __func__);
|
||||
wait_for_completion(&nvme_fc_unload_proceed);
|
||||
pr_info("%s: ctrl deletes complete\n", __func__);
|
||||
}
|
||||
|
||||
nvmf_unregister_transport(&nvme_fc_transport);
|
||||
|
||||
|
@ -123,14 +123,20 @@ void nvme_mpath_clear_current_path(struct nvme_ns *ns)
|
||||
}
|
||||
}
|
||||
|
||||
static bool nvme_path_is_disabled(struct nvme_ns *ns)
|
||||
{
|
||||
return ns->ctrl->state != NVME_CTRL_LIVE ||
|
||||
test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
|
||||
test_bit(NVME_NS_REMOVING, &ns->flags);
|
||||
}
|
||||
|
||||
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
|
||||
{
|
||||
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
|
||||
struct nvme_ns *found = NULL, *fallback = NULL, *ns;
|
||||
|
||||
list_for_each_entry_rcu(ns, &head->list, siblings) {
|
||||
if (ns->ctrl->state != NVME_CTRL_LIVE ||
|
||||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
||||
if (nvme_path_is_disabled(ns))
|
||||
continue;
|
||||
|
||||
if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
|
||||
@ -178,14 +184,16 @@ static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head,
|
||||
{
|
||||
struct nvme_ns *ns, *found, *fallback = NULL;
|
||||
|
||||
if (list_is_singular(&head->list))
|
||||
if (list_is_singular(&head->list)) {
|
||||
if (nvme_path_is_disabled(old))
|
||||
return NULL;
|
||||
return old;
|
||||
}
|
||||
|
||||
for (ns = nvme_next_ns(head, old);
|
||||
ns != old;
|
||||
ns = nvme_next_ns(head, ns)) {
|
||||
if (ns->ctrl->state != NVME_CTRL_LIVE ||
|
||||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
|
||||
if (nvme_path_is_disabled(ns))
|
||||
continue;
|
||||
|
||||
if (ns->ana_state == NVME_ANA_OPTIMIZED) {
|
||||
|
@ -283,6 +283,7 @@ struct nvme_subsystem {
|
||||
char firmware_rev[8];
|
||||
u8 cmic;
|
||||
u16 vendor_id;
|
||||
u16 awupf; /* 0's based awupf value. */
|
||||
struct ida ns_ida;
|
||||
#ifdef CONFIG_NVME_MULTIPATH
|
||||
enum nvme_iopolicy iopolicy;
|
||||
|
@ -1439,11 +1439,15 @@ static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
|
||||
|
||||
if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
|
||||
nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
|
||||
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
|
||||
nvmeq->sq_cmds);
|
||||
if (nvmeq->sq_dma_addr) {
|
||||
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
|
||||
return 0;
|
||||
if (nvmeq->sq_cmds) {
|
||||
nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
|
||||
nvmeq->sq_cmds);
|
||||
if (nvmeq->sq_dma_addr) {
|
||||
set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(depth));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2250,7 +2254,9 @@ static int nvme_dev_add(struct nvme_dev *dev)
|
||||
if (!dev->ctrl.tagset) {
|
||||
dev->tagset.ops = &nvme_mq_ops;
|
||||
dev->tagset.nr_hw_queues = dev->online_queues - 1;
|
||||
dev->tagset.nr_maps = 2; /* default + read */
|
||||
dev->tagset.nr_maps = 1; /* default */
|
||||
if (dev->io_queues[HCTX_TYPE_READ])
|
||||
dev->tagset.nr_maps++;
|
||||
if (dev->io_queues[HCTX_TYPE_POLL])
|
||||
dev->tagset.nr_maps++;
|
||||
dev->tagset.timeout = NVME_IO_TIMEOUT;
|
||||
@ -2289,8 +2295,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
||||
|
||||
pci_set_master(pdev);
|
||||
|
||||
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
|
||||
dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
|
||||
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
|
||||
goto disable;
|
||||
|
||||
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
|
||||
@ -2498,7 +2503,8 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
* Limit the max command size to prevent iod->sg allocations going
|
||||
* over a single page.
|
||||
*/
|
||||
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
|
||||
dev->ctrl.max_hw_sectors = min_t(u32,
|
||||
NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
|
||||
dev->ctrl.max_segments = NVME_MAX_SEGS;
|
||||
|
||||
/*
|
||||
@ -2923,7 +2929,7 @@ static int nvme_simple_resume(struct device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct dev_pm_ops nvme_dev_pm_ops = {
|
||||
static const struct dev_pm_ops nvme_dev_pm_ops = {
|
||||
.suspend = nvme_suspend,
|
||||
.resume = nvme_resume,
|
||||
.freeze = nvme_simple_suspend,
|
||||
|
@ -860,7 +860,14 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
|
||||
else
|
||||
flags |= MSG_MORE;
|
||||
|
||||
ret = kernel_sendpage(queue->sock, page, offset, len, flags);
|
||||
/* can't zcopy slab pages */
|
||||
if (unlikely(PageSlab(page))) {
|
||||
ret = sock_no_sendpage(queue->sock, page, offset, len,
|
||||
flags);
|
||||
} else {
|
||||
ret = kernel_sendpage(queue->sock, page, offset, len,
|
||||
flags);
|
||||
}
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
|
@ -7,6 +7,17 @@
|
||||
#include <asm/unaligned.h>
|
||||
#include "trace.h"
|
||||
|
||||
static const char *nvme_trace_delete_sq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u16 sqid = get_unaligned_le16(cdw10);
|
||||
|
||||
trace_seq_printf(p, "sqid=%u", sqid);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
@ -23,6 +34,17 @@ static const char *nvme_trace_create_sq(struct trace_seq *p, u8 *cdw10)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_delete_cq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
u16 cqid = get_unaligned_le16(cdw10);
|
||||
|
||||
trace_seq_printf(p, "cqid=%u", cqid);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const char *nvme_trace_create_cq(struct trace_seq *p, u8 *cdw10)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
@ -107,8 +129,12 @@ const char *nvme_trace_parse_admin_cmd(struct trace_seq *p,
|
||||
u8 opcode, u8 *cdw10)
|
||||
{
|
||||
switch (opcode) {
|
||||
case nvme_admin_delete_sq:
|
||||
return nvme_trace_delete_sq(p, cdw10);
|
||||
case nvme_admin_create_sq:
|
||||
return nvme_trace_create_sq(p, cdw10);
|
||||
case nvme_admin_delete_cq:
|
||||
return nvme_trace_delete_cq(p, cdw10);
|
||||
case nvme_admin_create_cq:
|
||||
return nvme_trace_create_cq(p, cdw10);
|
||||
case nvme_admin_identify:
|
||||
@ -178,7 +204,7 @@ static const char *nvme_trace_fabrics_common(struct trace_seq *p, u8 *spc)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
|
||||
trace_seq_printf(p, "spcecific=%*ph", 24, spc);
|
||||
trace_seq_printf(p, "specific=%*ph", 24, spc);
|
||||
trace_seq_putc(p, 0);
|
||||
return ret;
|
||||
}
|
||||
|
@ -442,6 +442,9 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ns->bdev)
|
||||
nvmet_bdev_set_limits(ns->bdev, id);
|
||||
|
||||
/*
|
||||
* We just provide a single LBA format that matches what the
|
||||
* underlying device reports.
|
||||
|
@ -588,8 +588,10 @@ static struct config_group *nvmet_ns_make(struct config_group *group,
|
||||
goto out;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (nsid == 0 || nsid == NVME_NSID_ALL)
|
||||
if (nsid == 0 || nsid == NVME_NSID_ALL) {
|
||||
pr_err("invalid nsid %#x", nsid);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -ENOMEM;
|
||||
ns = nvmet_ns_alloc(subsys, nsid);
|
||||
|
@ -434,7 +434,7 @@ fcloop_fcp_recv_work(struct work_struct *work)
|
||||
int ret = 0;
|
||||
bool aborted = false;
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_START:
|
||||
tfcp_req->inistate = INI_IO_ACTIVE;
|
||||
@ -443,11 +443,11 @@ fcloop_fcp_recv_work(struct work_struct *work)
|
||||
aborted = true;
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
if (unlikely(aborted))
|
||||
ret = -ECANCELED;
|
||||
@ -469,7 +469,7 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
struct nvmefc_fcp_req *fcpreq;
|
||||
bool completed = false;
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_ABORTED:
|
||||
@ -478,11 +478,11 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
completed = true;
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
if (unlikely(completed)) {
|
||||
/* remove reference taken in original abort downcall */
|
||||
@ -494,9 +494,9 @@ fcloop_fcp_abort_recv_work(struct work_struct *work)
|
||||
nvmet_fc_rcv_fcp_abort(tfcp_req->tport->targetport,
|
||||
&tfcp_req->tgt_fcp_req);
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
tfcp_req->fcpreq = NULL;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
fcloop_call_host_done(fcpreq, tfcp_req, -ECANCELED);
|
||||
/* call_host_done releases reference for abort downcall */
|
||||
@ -513,10 +513,10 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work)
|
||||
container_of(work, struct fcloop_fcpreq, tio_done_work);
|
||||
struct nvmefc_fcp_req *fcpreq;
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
tfcp_req->inistate = INI_IO_COMPLETED;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
fcloop_call_host_done(fcpreq, tfcp_req, tfcp_req->status);
|
||||
}
|
||||
@ -535,7 +535,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport,
|
||||
if (!rport->targetport)
|
||||
return -ECONNREFUSED;
|
||||
|
||||
tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL);
|
||||
tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_ATOMIC);
|
||||
if (!tfcp_req)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -621,12 +621,12 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
int fcp_err = 0, active, aborted;
|
||||
u8 op = tgt_fcpreq->op;
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
fcpreq = tfcp_req->fcpreq;
|
||||
active = tfcp_req->active;
|
||||
aborted = tfcp_req->aborted;
|
||||
tfcp_req->active = true;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
if (unlikely(active))
|
||||
/* illegal - call while i/o active */
|
||||
@ -634,9 +634,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
|
||||
if (unlikely(aborted)) {
|
||||
/* target transport has aborted i/o prior */
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
tfcp_req->active = false;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
tgt_fcpreq->transferred_length = 0;
|
||||
tgt_fcpreq->fcp_error = -ECANCELED;
|
||||
tgt_fcpreq->done(tgt_fcpreq);
|
||||
@ -693,9 +693,9 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport,
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
tfcp_req->active = false;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
tgt_fcpreq->transferred_length = xfrlen;
|
||||
tgt_fcpreq->fcp_error = fcp_err;
|
||||
@ -715,9 +715,9 @@ fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport,
|
||||
* (one doing io, other doing abort) and only kills ops posted
|
||||
* after the abort request
|
||||
*/
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
tfcp_req->aborted = true;
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
tfcp_req->status = NVME_SC_INTERNAL;
|
||||
|
||||
@ -765,7 +765,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
|
||||
return;
|
||||
|
||||
/* break initiator/target relationship for io */
|
||||
spin_lock(&tfcp_req->reqlock);
|
||||
spin_lock_irq(&tfcp_req->reqlock);
|
||||
switch (tfcp_req->inistate) {
|
||||
case INI_IO_START:
|
||||
case INI_IO_ACTIVE:
|
||||
@ -775,11 +775,11 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport,
|
||||
abortio = false;
|
||||
break;
|
||||
default:
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&tfcp_req->reqlock);
|
||||
spin_unlock_irq(&tfcp_req->reqlock);
|
||||
|
||||
if (abortio)
|
||||
/* leave the reference while the work item is scheduled */
|
||||
|
@ -8,6 +8,45 @@
|
||||
#include <linux/module.h>
|
||||
#include "nvmet.h"
|
||||
|
||||
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
|
||||
{
|
||||
const struct queue_limits *ql = &bdev_get_queue(bdev)->limits;
|
||||
/* Number of physical blocks per logical block. */
|
||||
const u32 ppl = ql->physical_block_size / ql->logical_block_size;
|
||||
/* Physical blocks per logical block, 0's based. */
|
||||
const __le16 ppl0b = to0based(ppl);
|
||||
|
||||
/*
|
||||
* For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN,
|
||||
* NAWUPF, and NACWU are defined for this namespace and should be
|
||||
* used by the host for this namespace instead of the AWUN, AWUPF,
|
||||
* and ACWU fields in the Identify Controller data structure. If
|
||||
* any of these fields are zero that means that the corresponding
|
||||
* field from the identify controller data structure should be used.
|
||||
*/
|
||||
id->nsfeat |= 1 << 1;
|
||||
id->nawun = ppl0b;
|
||||
id->nawupf = ppl0b;
|
||||
id->nacwu = ppl0b;
|
||||
|
||||
/*
|
||||
* Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and
|
||||
* NOWS are defined for this namespace and should be used by
|
||||
* the host for I/O optimization.
|
||||
*/
|
||||
id->nsfeat |= 1 << 4;
|
||||
/* NPWG = Namespace Preferred Write Granularity. 0's based */
|
||||
id->npwg = ppl0b;
|
||||
/* NPWA = Namespace Preferred Write Alignment. 0's based */
|
||||
id->npwa = id->npwg;
|
||||
/* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
|
||||
id->npdg = to0based(ql->discard_granularity / ql->logical_block_size);
|
||||
/* NPDG = Namespace Preferred Deallocate Alignment */
|
||||
id->npda = id->npdg;
|
||||
/* NOWS = Namespace Optimal Write Size */
|
||||
id->nows = to0based(ql->io_opt / ql->logical_block_size);
|
||||
}
|
||||
|
||||
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
|
||||
{
|
||||
int ret;
|
||||
|
@ -365,6 +365,7 @@ u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
|
||||
void nvmet_execute_async_event(struct nvmet_req *req);
|
||||
|
||||
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
|
||||
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
|
||||
u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
|
||||
u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
|
||||
@ -492,4 +493,11 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
|
||||
}
|
||||
|
||||
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
|
||||
|
||||
/* Convert a 32-bit number to a 16-bit 0's based number */
|
||||
static inline __le16 to0based(u32 a)
|
||||
{
|
||||
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
|
||||
}
|
||||
|
||||
#endif /* _NVMET_H */
|
||||
|
@ -146,7 +146,7 @@ static const char *nvmet_trace_fabrics_common(struct trace_seq *p, u8 *spc)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
|
||||
trace_seq_printf(p, "spcecific=%*ph", 24, spc);
|
||||
trace_seq_printf(p, "specific=%*ph", 24, spc);
|
||||
trace_seq_putc(p, 0);
|
||||
return ret;
|
||||
}
|
||||
|
@ -213,8 +213,7 @@ extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
|
||||
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
|
||||
struct scsi_sense_hdr *sshdr);
|
||||
extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
struct blk_zone *zones, unsigned int *nr_zones);
|
||||
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
|
||||
|
@ -9,6 +9,8 @@
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
@ -50,7 +52,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
|
||||
/**
|
||||
* sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
|
||||
* @sdkp: The target disk
|
||||
* @buf: Buffer to use for the reply
|
||||
* @buf: vmalloc-ed buffer to use for the reply
|
||||
* @buflen: the buffer size
|
||||
* @lba: Start LBA of the report
|
||||
* @partial: Do partial report
|
||||
@ -79,7 +81,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
|
||||
put_unaligned_be32(buflen, &cmd[10]);
|
||||
if (partial)
|
||||
cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
|
||||
memset(buf, 0, buflen);
|
||||
|
||||
result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
|
||||
buf, buflen, &sshdr,
|
||||
@ -103,45 +104,83 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum number of zones to get with one report zones command.
|
||||
*/
|
||||
#define SD_ZBC_REPORT_MAX_ZONES 8192U
|
||||
|
||||
/**
|
||||
* Allocate a buffer for report zones reply.
|
||||
* @sdkp: The target disk
|
||||
* @nr_zones: Maximum number of zones to report
|
||||
* @buflen: Size of the buffer allocated
|
||||
*
|
||||
* Try to allocate a reply buffer for the number of requested zones.
|
||||
* The size of the buffer allocated may be smaller than requested to
|
||||
* satify the device constraint (max_hw_sectors, max_segments, etc).
|
||||
*
|
||||
* Return the address of the allocated buffer and update @buflen with
|
||||
* the size of the allocated buffer.
|
||||
*/
|
||||
static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
|
||||
unsigned int nr_zones, size_t *buflen)
|
||||
{
|
||||
struct request_queue *q = sdkp->disk->queue;
|
||||
size_t bufsize;
|
||||
void *buf;
|
||||
|
||||
/*
|
||||
* Report zone buffer size should be at most 64B times the number of
|
||||
* zones requested plus the 64B reply header, but should be at least
|
||||
* SECTOR_SIZE for ATA devices.
|
||||
* Make sure that this size does not exceed the hardware capabilities.
|
||||
* Furthermore, since the report zone command cannot be split, make
|
||||
* sure that the allocated buffer can always be mapped by limiting the
|
||||
* number of pages allocated to the HBA max segments limit.
|
||||
*/
|
||||
nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES);
|
||||
bufsize = roundup((nr_zones + 1) * 64, 512);
|
||||
bufsize = min_t(size_t, bufsize,
|
||||
queue_max_hw_sectors(q) << SECTOR_SHIFT);
|
||||
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
|
||||
|
||||
buf = vzalloc(bufsize);
|
||||
if (buf)
|
||||
*buflen = bufsize;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* sd_zbc_report_zones - Disk report zones operation.
|
||||
* @disk: The target disk
|
||||
* @sector: Start 512B sector of the report
|
||||
* @zones: Array of zone descriptors
|
||||
* @nr_zones: Number of descriptors in the array
|
||||
* @gfp_mask: Memory allocation mask
|
||||
*
|
||||
* Execute a report zones command on the target disk.
|
||||
*/
|
||||
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask)
|
||||
struct blk_zone *zones, unsigned int *nr_zones)
|
||||
{
|
||||
struct scsi_disk *sdkp = scsi_disk(disk);
|
||||
unsigned int i, buflen, nrz = *nr_zones;
|
||||
unsigned int i, nrz = *nr_zones;
|
||||
unsigned char *buf;
|
||||
size_t offset = 0;
|
||||
size_t buflen = 0, offset = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (!sd_is_zoned(sdkp))
|
||||
/* Not a zoned device */
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/*
|
||||
* Get a reply buffer for the number of requested zones plus a header,
|
||||
* without exceeding the device maximum command size. For ATA disks,
|
||||
* buffers must be aligned to 512B.
|
||||
*/
|
||||
buflen = min(queue_max_hw_sectors(disk->queue) << 9,
|
||||
roundup((nrz + 1) * 64, 512));
|
||||
buf = kmalloc(buflen, gfp_mask);
|
||||
buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
|
||||
sectors_to_logical(sdkp->device, sector), true);
|
||||
if (ret)
|
||||
goto out_free_buf;
|
||||
goto out;
|
||||
|
||||
nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
|
||||
for (i = 0; i < nrz; i++) {
|
||||
@ -152,8 +191,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
|
||||
|
||||
*nr_zones = nrz;
|
||||
|
||||
out_free_buf:
|
||||
kfree(buf);
|
||||
out:
|
||||
kvfree(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -287,8 +326,6 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define SD_ZBC_BUF_SIZE 131072U
|
||||
|
||||
/**
|
||||
* sd_zbc_check_zones - Check the device capacity and zone sizes
|
||||
* @sdkp: Target disk
|
||||
@ -304,22 +341,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
|
||||
*/
|
||||
static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
|
||||
{
|
||||
size_t bufsize, buflen;
|
||||
unsigned int noio_flag;
|
||||
u64 zone_blocks = 0;
|
||||
sector_t max_lba, block = 0;
|
||||
unsigned char *buf;
|
||||
unsigned char *rec;
|
||||
unsigned int buf_len;
|
||||
unsigned int list_length;
|
||||
int ret;
|
||||
u8 same;
|
||||
|
||||
/* Do all memory allocations as if GFP_NOIO was specified */
|
||||
noio_flag = memalloc_noio_save();
|
||||
|
||||
/* Get a buffer */
|
||||
buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES,
|
||||
&bufsize);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Do a report zone to get max_lba and the same field */
|
||||
ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
|
||||
ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
@ -355,12 +398,12 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
|
||||
do {
|
||||
|
||||
/* Parse REPORT ZONES header */
|
||||
list_length = get_unaligned_be32(&buf[0]) + 64;
|
||||
buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64,
|
||||
bufsize);
|
||||
rec = buf + 64;
|
||||
buf_len = min(list_length, SD_ZBC_BUF_SIZE);
|
||||
|
||||
/* Parse zone descriptors */
|
||||
while (rec < buf + buf_len) {
|
||||
while (rec < buf + buflen) {
|
||||
u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
|
||||
|
||||
if (zone_blocks == 0) {
|
||||
@ -376,8 +419,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
|
||||
}
|
||||
|
||||
if (block < sdkp->capacity) {
|
||||
ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
|
||||
block, true);
|
||||
ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block,
|
||||
true);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
}
|
||||
@ -408,7 +451,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
|
||||
}
|
||||
|
||||
out_free:
|
||||
kfree(buf);
|
||||
memalloc_noio_restore(noio_flag);
|
||||
kvfree(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2911,7 +2911,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
bio = NULL;
|
||||
} else {
|
||||
if (wbc)
|
||||
wbc_account_io(wbc, page, page_size);
|
||||
wbc_account_cgroup_owner(wbc, page, page_size);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -2924,7 +2924,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
bio->bi_opf = opf;
|
||||
if (wbc) {
|
||||
wbc_init_bio(wbc, bio);
|
||||
wbc_account_io(wbc, page, page_size);
|
||||
wbc_account_cgroup_owner(wbc, page, page_size);
|
||||
}
|
||||
|
||||
*bio_ret = bio;
|
||||
|
@ -3089,7 +3089,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
|
||||
|
||||
if (wbc) {
|
||||
wbc_init_bio(wbc, bio);
|
||||
wbc_account_io(wbc, bh->b_page, bh->b_size);
|
||||
wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
|
||||
}
|
||||
|
||||
submit_bio(bio);
|
||||
|
@ -396,7 +396,7 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
|
||||
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
|
||||
if (ret != bh->b_size)
|
||||
goto submit_and_retry;
|
||||
wbc_account_io(io->io_wbc, page, bh->b_size);
|
||||
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
|
||||
io->io_next_block++;
|
||||
return 0;
|
||||
}
|
||||
|
@ -470,7 +470,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
|
||||
}
|
||||
|
||||
if (fio->io_wbc && !is_read_io(fio->op))
|
||||
wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
|
||||
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
|
||||
|
||||
bio_set_op_attrs(bio, fio->op, fio->op_flags);
|
||||
|
||||
@ -513,7 +513,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
|
||||
}
|
||||
|
||||
if (fio->io_wbc)
|
||||
wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
|
||||
wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
|
||||
|
||||
inc_page_count(fio->sbi, WB_DATA_TYPE(page));
|
||||
|
||||
@ -592,7 +592,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
|
||||
}
|
||||
|
||||
if (fio->io_wbc)
|
||||
wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
|
||||
wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
|
||||
|
||||
io->last_block_in_bio = fio->new_blkaddr;
|
||||
f2fs_trace_ios(fio, 0);
|
||||
|
@ -2818,9 +2818,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
|
||||
while (zones && sector < nr_sectors) {
|
||||
|
||||
nr_zones = F2FS_REPORT_NR_ZONES;
|
||||
err = blkdev_report_zones(bdev, sector,
|
||||
zones, &nr_zones,
|
||||
GFP_KERNEL);
|
||||
err = blkdev_report_zones(bdev, sector, zones, &nr_zones);
|
||||
if (err)
|
||||
break;
|
||||
if (!nr_zones) {
|
||||
|
@ -270,6 +270,7 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
|
||||
if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
|
||||
wb_put(wb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__inode_attach_wb);
|
||||
|
||||
/**
|
||||
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
|
||||
@ -582,6 +583,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
|
||||
if (unlikely(wb_dying(wbc->wb)))
|
||||
inode_switch_wbs(inode, wbc->wb_id);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
|
||||
|
||||
/**
|
||||
* wbc_detach_inode - disassociate wbc from inode and perform foreign detection
|
||||
@ -701,9 +703,10 @@ void wbc_detach_inode(struct writeback_control *wbc)
|
||||
wb_put(wbc->wb);
|
||||
wbc->wb = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wbc_detach_inode);
|
||||
|
||||
/**
|
||||
* wbc_account_io - account IO issued during writeback
|
||||
* wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
|
||||
* @wbc: writeback_control of the writeback in progress
|
||||
* @page: page being written out
|
||||
* @bytes: number of bytes being written out
|
||||
@ -712,8 +715,8 @@ void wbc_detach_inode(struct writeback_control *wbc)
|
||||
* controlled by @wbc. Keep the book for foreign inode detection. See
|
||||
* wbc_detach_inode().
|
||||
*/
|
||||
void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes)
|
||||
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
int id;
|
||||
@ -724,7 +727,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
* behind a slow cgroup. Ultimately, we want pageout() to kick off
|
||||
* regular writeback instead of writing things out itself.
|
||||
*/
|
||||
if (!wbc->wb)
|
||||
if (!wbc->wb || wbc->no_cgroup_owner)
|
||||
return;
|
||||
|
||||
css = mem_cgroup_css_from_page(page);
|
||||
@ -750,7 +753,7 @@ void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
else
|
||||
wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(wbc_account_io);
|
||||
EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
|
||||
|
||||
/**
|
||||
* inode_congested - test whether an inode is congested
|
||||
|
@ -647,7 +647,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
|
||||
* the confused fail path above (OOM) will be very confused when
|
||||
* it finds all bh marked clean (i.e. it will not write anything)
|
||||
*/
|
||||
wbc_account_io(wbc, page, PAGE_SIZE);
|
||||
wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
|
||||
length = first_unmapped << blkbits;
|
||||
if (bio_add_page(bio, page, length, 0) < length) {
|
||||
bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
|
||||
|
@ -796,7 +796,7 @@ xfs_add_to_ioend(
|
||||
}
|
||||
|
||||
wpc->ioend->io_size += len;
|
||||
wbc_account_io(wbc, page, len);
|
||||
wbc_account_cgroup_owner(wbc, page, len);
|
||||
}
|
||||
|
||||
STATIC void
|
||||
|
@ -48,6 +48,7 @@ extern spinlock_t bdi_lock;
|
||||
extern struct list_head bdi_list;
|
||||
|
||||
extern struct workqueue_struct *bdi_wq;
|
||||
extern struct workqueue_struct *bdi_async_bio_wq;
|
||||
|
||||
static inline bool wb_has_dirty_io(struct bdi_writeback *wb)
|
||||
{
|
||||
|
@ -132,13 +132,17 @@ struct blkcg_gq {
|
||||
|
||||
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
spinlock_t async_bio_lock;
|
||||
struct bio_list async_bios;
|
||||
struct work_struct async_bio_work;
|
||||
|
||||
atomic_t use_delay;
|
||||
atomic64_t delay_nsec;
|
||||
atomic64_t delay_start;
|
||||
u64 last_delay;
|
||||
int last_use;
|
||||
|
||||
struct rcu_head rcu_head;
|
||||
};
|
||||
|
||||
typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
|
||||
@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
|
||||
struct bio *bio) { return false; }
|
||||
#endif
|
||||
|
||||
bool __blkcg_punt_bio_submit(struct bio *bio);
|
||||
|
||||
static inline bool blkcg_punt_bio_submit(struct bio *bio)
|
||||
{
|
||||
if (bio->bi_opf & REQ_CGROUP_PUNT)
|
||||
return __blkcg_punt_bio_submit(bio);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio)
|
||||
{
|
||||
@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
|
||||
static inline void blkg_get(struct blkcg_gq *blkg) { }
|
||||
static inline void blkg_put(struct blkcg_gq *blkg) { }
|
||||
|
||||
static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
|
||||
static inline void blkcg_bio_issue_init(struct bio *bio) { }
|
||||
static inline bool blkcg_bio_issue_check(struct request_queue *q,
|
||||
struct bio *bio) { return true; }
|
||||
|
@ -311,6 +311,14 @@ enum req_flag_bits {
|
||||
__REQ_RAHEAD, /* read ahead, can fail anytime */
|
||||
__REQ_BACKGROUND, /* background IO */
|
||||
__REQ_NOWAIT, /* Don't wait if request will block */
|
||||
/*
|
||||
* When a shared kthread needs to issue a bio for a cgroup, doing
|
||||
* so synchronously can lead to priority inversions as the kthread
|
||||
* can be trapped waiting for that cgroup. CGROUP_PUNT flag makes
|
||||
* submit_bio() punt the actual issuing to a dedicated per-blkcg
|
||||
* work item to avoid such priority inversions.
|
||||
*/
|
||||
__REQ_CGROUP_PUNT,
|
||||
|
||||
/* command specific flags for REQ_OP_WRITE_ZEROES: */
|
||||
__REQ_NOUNMAP, /* do not free blocks when zeroing */
|
||||
@ -337,6 +345,8 @@ enum req_flag_bits {
|
||||
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
|
||||
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
|
||||
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
|
||||
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
|
||||
|
||||
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
|
||||
#define REQ_HIPRI (1ULL << __REQ_HIPRI)
|
||||
|
||||
|
@ -344,10 +344,15 @@ struct queue_limits {
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
|
||||
/*
|
||||
* Maximum number of zones to report with a single report zones command.
|
||||
*/
|
||||
#define BLK_ZONED_REPORT_MAX_ZONES 8192U
|
||||
|
||||
extern unsigned int blkdev_nr_zones(struct block_device *bdev);
|
||||
extern int blkdev_report_zones(struct block_device *bdev,
|
||||
sector_t sector, struct blk_zone *zones,
|
||||
unsigned int *nr_zones, gfp_t gfp_mask);
|
||||
unsigned int *nr_zones);
|
||||
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
|
||||
sector_t nr_sectors, gfp_t gfp_mask);
|
||||
extern int blk_revalidate_disk_zones(struct gendisk *disk);
|
||||
@ -681,7 +686,7 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
|
||||
static inline sector_t blk_queue_zone_sectors(struct request_queue *q)
|
||||
{
|
||||
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
|
||||
}
|
||||
@ -1418,7 +1423,7 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
|
||||
static inline sector_t bdev_zone_sectors(struct block_device *bdev)
|
||||
{
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
@ -1673,8 +1678,7 @@ struct block_device_operations {
|
||||
/* this callback is with swap_lock and sometimes page table lock held */
|
||||
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
|
||||
int (*report_zones)(struct gendisk *, sector_t sector,
|
||||
struct blk_zone *zones, unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
struct blk_zone *zones, unsigned int *nr_zones);
|
||||
struct module *owner;
|
||||
const struct pr_ops *pr_ops;
|
||||
};
|
||||
|
@ -699,6 +699,7 @@ void cgroup_path_from_kernfs_id(const union kernfs_node_id *id,
|
||||
struct cgroup_subsys_state;
|
||||
struct cgroup;
|
||||
|
||||
static inline void css_get(struct cgroup_subsys_state *css) {}
|
||||
static inline void css_put(struct cgroup_subsys_state *css) {}
|
||||
static inline int cgroup_attach_task_all(struct task_struct *from,
|
||||
struct task_struct *t) { return 0; }
|
||||
|
@ -95,8 +95,7 @@ typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **
|
||||
|
||||
typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
|
||||
struct blk_zone *zones,
|
||||
unsigned int *nr_zones,
|
||||
gfp_t gfp_mask);
|
||||
unsigned int *nr_zones);
|
||||
|
||||
/*
|
||||
* These iteration functions are typically used to check (and combine)
|
||||
|
@ -75,7 +75,7 @@ struct elevator_type
|
||||
size_t icq_size; /* see iocontext.h */
|
||||
size_t icq_align; /* ditto */
|
||||
struct elv_fs_entry *elevator_attrs;
|
||||
char elevator_name[ELV_NAME_MAX];
|
||||
const char *elevator_name;
|
||||
const char *elevator_alias;
|
||||
struct module *elevator_owner;
|
||||
#ifdef CONFIG_BLK_DEBUG_FS
|
||||
@ -160,15 +160,6 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
|
||||
#define ELEVATOR_INSERT_FLUSH 5
|
||||
#define ELEVATOR_INSERT_SORT_MERGE 6
|
||||
|
||||
/*
|
||||
* return values from elevator_may_queue_fn
|
||||
*/
|
||||
enum {
|
||||
ELV_MQUEUE_MAY,
|
||||
ELV_MQUEUE_NO,
|
||||
ELV_MQUEUE_MUST,
|
||||
};
|
||||
|
||||
#define rq_end_sector(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq))
|
||||
#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
|
||||
|
||||
|
@ -315,7 +315,7 @@ struct nvme_id_ns {
|
||||
__u8 nmic;
|
||||
__u8 rescap;
|
||||
__u8 fpi;
|
||||
__u8 rsvd33;
|
||||
__u8 dlfeat;
|
||||
__le16 nawun;
|
||||
__le16 nawupf;
|
||||
__le16 nacwu;
|
||||
@ -324,11 +324,17 @@ struct nvme_id_ns {
|
||||
__le16 nabspf;
|
||||
__le16 noiob;
|
||||
__u8 nvmcap[16];
|
||||
__u8 rsvd64[28];
|
||||
__le16 npwg;
|
||||
__le16 npwa;
|
||||
__le16 npdg;
|
||||
__le16 npda;
|
||||
__le16 nows;
|
||||
__u8 rsvd74[18];
|
||||
__le32 anagrpid;
|
||||
__u8 rsvd96[3];
|
||||
__u8 nsattr;
|
||||
__u8 rsvd100[4];
|
||||
__le16 nvmsetid;
|
||||
__le16 endgid;
|
||||
__u8 nguid[16];
|
||||
__u8 eui64[8];
|
||||
struct nvme_lbaf lbaf[16];
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/flex_proportions.h>
|
||||
#include <linux/backing-dev-defs.h>
|
||||
#include <linux/blk_types.h>
|
||||
#include <linux/blk-cgroup.h>
|
||||
|
||||
struct bio;
|
||||
|
||||
@ -68,6 +69,17 @@ struct writeback_control {
|
||||
unsigned for_reclaim:1; /* Invoked from the page allocator */
|
||||
unsigned range_cyclic:1; /* range_start is cyclic */
|
||||
unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
|
||||
|
||||
/*
|
||||
* When writeback IOs are bounced through async layers, only the
|
||||
* initial synchronous phase should be accounted towards inode
|
||||
* cgroup ownership arbitration to avoid confusion. Later stages
|
||||
* can set the following flag to disable the accounting.
|
||||
*/
|
||||
unsigned no_cgroup_owner:1;
|
||||
|
||||
unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */
|
||||
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
struct bdi_writeback *wb; /* wb this writeback is issued under */
|
||||
struct inode *inode; /* inode being written out */
|
||||
@ -84,12 +96,27 @@ struct writeback_control {
|
||||
|
||||
static inline int wbc_to_write_flags(struct writeback_control *wbc)
|
||||
{
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
return REQ_SYNC;
|
||||
else if (wbc->for_kupdate || wbc->for_background)
|
||||
return REQ_BACKGROUND;
|
||||
int flags = 0;
|
||||
|
||||
return 0;
|
||||
if (wbc->punt_to_cgroup)
|
||||
flags = REQ_CGROUP_PUNT;
|
||||
|
||||
if (wbc->sync_mode == WB_SYNC_ALL)
|
||||
flags |= REQ_SYNC;
|
||||
else if (wbc->for_kupdate || wbc->for_background)
|
||||
flags |= REQ_BACKGROUND;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline struct cgroup_subsys_state *
|
||||
wbc_blkcg_css(struct writeback_control *wbc)
|
||||
{
|
||||
#ifdef CONFIG_CGROUP_WRITEBACK
|
||||
if (wbc->wb)
|
||||
return wbc->wb->blkcg_css;
|
||||
#endif
|
||||
return blkcg_root_css;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -188,8 +215,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
|
||||
struct inode *inode)
|
||||
__releases(&inode->i_lock);
|
||||
void wbc_detach_inode(struct writeback_control *wbc);
|
||||
void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes);
|
||||
void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes);
|
||||
void cgroup_writeback_umount(void);
|
||||
|
||||
/**
|
||||
@ -291,8 +318,8 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void wbc_account_io(struct writeback_control *wbc,
|
||||
struct page *page, size_t bytes)
|
||||
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
|
||||
struct page *page, size_t bytes)
|
||||
{
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user