mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-04 04:04:19 +00:00
for-5.19/drivers-2022-05-22
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmKKrTcQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgph/REAC0/7odRfJeTJ1PkJhSKFc7dhyS7rK4du2s 3+z+H6Yeua2yVIJb0mYYGEJcOUUQ9nD2T9424n3NzDOw88U4y8Vg2YEH+UiJBuj4 AJoxPNkQdxL7WzmwHmRNLCcOOFhISLqWiCJSr45d+LP1f6aO24Q9lewYWxtNA4TW mqb7Ne7e3Z77m9rmsCsZ26bzQHg1EEQ6qgjZM9tqMhOeTqYhmrqfrD9KtG8TIkpK N8277E5QcequHf7v6VpKqEOzf3d2kx55JaZdu+oxLPVMED3wJJFwcYF1/xmM7Fgx tp7xCjqqUHXwKvJNCFJpnvw+cXu0Ct7cWOIG4ROCvaTD4vBI1KzZLc0gO7pKFW0Y hNIlMXr4n8PmonS81tMV4TqmRWxedX/jxuaeJCVNr89PqYU4luPpigJZqv7rlGry KZUlktQot22M/7FC2MS6KhgbQKLPrRGTAEyY/JNwBHckCZiduWQFlmKLQ926xQIJ 6vdjSzHK5MrT/d+yow3bGFxAJWloGJ+L+RsH0b+WikF81+6ic9P3AoStgbVilfKD 6sbjcju8SShDlQ+W/Ocm0rHC+i/RDKT3QqItXgfhA/1FfMPODQGc/xcZg+AdTswn VSnUIkvk9/mTO0StilVfNJDfG1QkSpJ5Ilvs/DnIahZj6IG4QbJvtnVNbmQX6ptz AUB4DdGwXg== =geQL -----END PGP SIGNATURE----- Merge tag 'for-5.19/drivers-2022-05-22' of git://git.kernel.dk/linux-block Pull block driver updates from Jens Axboe: "Here are the driver updates queued up for 5.19. This contains: - NVMe pull requests via Christoph: - tighten the PCI presence check (Stefan Roese) - fix a potential NULL pointer dereference in an error path (Kyle Miller Smith) - fix interpretation of the DMRSL field (Tom Yan) - relax the data transfer alignment (Keith Busch) - verbose error logging improvements (Max Gurtovoy, Chaitanya Kulkarni) - misc cleanups (Chaitanya Kulkarni, Christoph) - set non-mdts limits in nvme_scan_work (Chaitanya Kulkarni) - add support for TP4084 - Time-to-Ready Enhancements (Christoph) - MD pull request via Song: - Improve annotation in raid5 code, by Logan Gunthorpe - Support MD_BROKEN flag in raid-1/5/10, by Mariusz Tkaczyk - Other small fixes/cleanups - null_blk series making the configfs side much saner (Damien) - Various minor drbd cleanups and fixes (Haowen, Uladzislau, Jiapeng, Arnd, Cai) - Avoid using the system workqueue (and hence flushing it) in rnbd (Jack) - Avoid using the system workqueue (and hence flushing it) in aoe (Tetsuo) - Series fixing discard_alignment issues in drivers (Christoph) - Small series fixing drivers poking at disk->part0 for openers information (Christoph) - Series fixing deadlocks in loop (Christoph, Tetsuo) - Remove loop.h and add SPDX headers (Christoph) - Various fixes and cleanups (Julia, Xie, Yu)" * tag 'for-5.19/drivers-2022-05-22' of git://git.kernel.dk/linux-block: (72 commits) mtip32xx: fix typo in comment nvme: set non-mdts limits in nvme_scan_work nvme: add support for TP4084 - Time-to-Ready Enhancements nvme: split the enum used for various register constants nbd: Fix hung on disconnect request if socket is closed before nvme-fabrics: add a request timeout helper nvme-pci: harden drive presence detect in nvme_dev_disable() nvme-pci: fix a NULL pointer dereference in nvme_alloc_admin_tags nvme: mark internal passthru request RQF_QUIET nvme: remove unneeded include from constants file nvme: add missing status values to verbose logging nvme: set dma alignment to dword nvme: fix interpretation of DMRSL loop: remove most the top-of-file boilerplate comment from the UAPI header loop: remove most the top-of-file boilerplate comment loop: add a SPDX header loop: remove loop.h block: null_blk: Improve device creation with configfs block: null_blk: Cleanup messages block: null_blk: Cleanup device creation and deletion ...
This commit is contained in:
commit
5dc921868c
@ -799,7 +799,6 @@ static int ubd_open_dev(struct ubd *ubd_dev)
|
||||
}
|
||||
if (ubd_dev->no_trim == 0) {
|
||||
ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
|
||||
ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
|
||||
blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
|
||||
}
|
||||
|
16
block/bdev.c
16
block/bdev.c
@ -673,17 +673,17 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
|
||||
}
|
||||
}
|
||||
|
||||
if (!bdev->bd_openers)
|
||||
if (!atomic_read(&bdev->bd_openers))
|
||||
set_init_blocksize(bdev);
|
||||
if (test_bit(GD_NEED_PART_SCAN, &disk->state))
|
||||
bdev_disk_changed(disk, false);
|
||||
bdev->bd_openers++;
|
||||
atomic_inc(&bdev->bd_openers);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
if (!--bdev->bd_openers)
|
||||
if (atomic_dec_and_test(&bdev->bd_openers))
|
||||
blkdev_flush_mapping(bdev);
|
||||
if (bdev->bd_disk->fops->release)
|
||||
bdev->bd_disk->fops->release(bdev->bd_disk, mode);
|
||||
@ -694,7 +694,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
|
||||
struct gendisk *disk = part->bd_disk;
|
||||
int ret;
|
||||
|
||||
if (part->bd_openers)
|
||||
if (atomic_read(&part->bd_openers))
|
||||
goto done;
|
||||
|
||||
ret = blkdev_get_whole(bdev_whole(part), mode);
|
||||
@ -708,7 +708,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
|
||||
disk->open_partitions++;
|
||||
set_init_blocksize(part);
|
||||
done:
|
||||
part->bd_openers++;
|
||||
atomic_inc(&part->bd_openers);
|
||||
return 0;
|
||||
|
||||
out_blkdev_put:
|
||||
@ -720,7 +720,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
|
||||
{
|
||||
struct block_device *whole = bdev_whole(part);
|
||||
|
||||
if (--part->bd_openers)
|
||||
if (!atomic_dec_and_test(&part->bd_openers))
|
||||
return;
|
||||
blkdev_flush_mapping(part);
|
||||
whole->bd_disk->open_partitions--;
|
||||
@ -899,7 +899,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
|
||||
* of the world and we want to avoid long (could be several minute)
|
||||
* syncs while holding the mutex.
|
||||
*/
|
||||
if (bdev->bd_openers == 1)
|
||||
if (atomic_read(&bdev->bd_openers) == 1)
|
||||
sync_blockdev(bdev);
|
||||
|
||||
mutex_lock(&disk->open_mutex);
|
||||
@ -1044,7 +1044,7 @@ void sync_bdevs(bool wait)
|
||||
bdev = I_BDEV(inode);
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (!bdev->bd_openers) {
|
||||
if (!atomic_read(&bdev->bd_openers)) {
|
||||
; /* skip */
|
||||
} else if (wait) {
|
||||
/*
|
||||
|
@ -478,7 +478,7 @@ int bdev_del_partition(struct gendisk *disk, int partno)
|
||||
goto out_unlock;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (part->bd_openers)
|
||||
if (atomic_read(&part->bd_openers))
|
||||
goto out_unlock;
|
||||
|
||||
delete_partition(part);
|
||||
|
@ -244,3 +244,5 @@ void aoenet_exit(void);
|
||||
void aoenet_xmit(struct sk_buff_head *);
|
||||
int is_aoe_netif(struct net_device *ifp);
|
||||
int set_aoe_iflist(const char __user *str, size_t size);
|
||||
|
||||
extern struct workqueue_struct *aoe_wq;
|
||||
|
@ -435,7 +435,7 @@ aoeblk_gdalloc(void *vp)
|
||||
err:
|
||||
spin_lock_irqsave(&d->lock, flags);
|
||||
d->flags &= ~DEVFL_GD_NOW;
|
||||
schedule_work(&d->work);
|
||||
queue_work(aoe_wq, &d->work);
|
||||
spin_unlock_irqrestore(&d->lock, flags);
|
||||
}
|
||||
|
||||
|
@ -968,7 +968,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
|
||||
d->flags |= DEVFL_NEWSIZE;
|
||||
else
|
||||
d->flags |= DEVFL_GDALLOC;
|
||||
schedule_work(&d->work);
|
||||
queue_work(aoe_wq, &d->work);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -321,7 +321,7 @@ flush(const char __user *str, size_t cnt, int exiting)
|
||||
specified = 1;
|
||||
}
|
||||
|
||||
flush_scheduled_work();
|
||||
flush_workqueue(aoe_wq);
|
||||
/* pass one: do aoedev_downdev, which might sleep */
|
||||
restart1:
|
||||
spin_lock_irqsave(&devlist_lock, flags);
|
||||
@ -520,7 +520,7 @@ freetgt(struct aoedev *d, struct aoetgt *t)
|
||||
void
|
||||
aoedev_exit(void)
|
||||
{
|
||||
flush_scheduled_work();
|
||||
flush_workqueue(aoe_wq);
|
||||
flush(NULL, 0, EXITING);
|
||||
}
|
||||
|
||||
|
@ -16,6 +16,7 @@ MODULE_DESCRIPTION("AoE block/char driver for 2.6.2 and newer 2.6 kernels");
|
||||
MODULE_VERSION(VERSION);
|
||||
|
||||
static struct timer_list timer;
|
||||
struct workqueue_struct *aoe_wq;
|
||||
|
||||
static void discover_timer(struct timer_list *t)
|
||||
{
|
||||
@ -35,6 +36,7 @@ aoe_exit(void)
|
||||
aoechr_exit();
|
||||
aoedev_exit();
|
||||
aoeblk_exit(); /* free cache after de-allocating bufs */
|
||||
destroy_workqueue(aoe_wq);
|
||||
}
|
||||
|
||||
static int __init
|
||||
@ -42,9 +44,13 @@ aoe_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
aoe_wq = alloc_workqueue("aoe_wq", 0, 0);
|
||||
if (!aoe_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = aoedev_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
goto dev_fail;
|
||||
ret = aoechr_init();
|
||||
if (ret)
|
||||
goto chr_fail;
|
||||
@ -77,6 +83,8 @@ aoe_init(void)
|
||||
aoechr_exit();
|
||||
chr_fail:
|
||||
aoedev_exit();
|
||||
dev_fail:
|
||||
destroy_workqueue(aoe_wq);
|
||||
|
||||
printk(KERN_INFO "aoe: initialisation failure.\n");
|
||||
return ret;
|
||||
|
@ -683,7 +683,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
|
||||
}
|
||||
}
|
||||
|
||||
want = ALIGN(words*sizeof(long), PAGE_SIZE) >> PAGE_SHIFT;
|
||||
want = PFN_UP(words*sizeof(long));
|
||||
have = b->bm_number_of_pages;
|
||||
if (want == have) {
|
||||
D_ASSERT(device, b->bm_pages != NULL);
|
||||
|
@ -3581,9 +3581,8 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
* when we want to support more than
|
||||
* one PRO_VERSION */
|
||||
static const char *cmdnames[] = {
|
||||
|
||||
[P_DATA] = "Data",
|
||||
[P_WSAME] = "WriteSame",
|
||||
[P_TRIM] = "Trim",
|
||||
[P_DATA_REPLY] = "DataReply",
|
||||
[P_RS_DATA_REPLY] = "RSDataReply",
|
||||
[P_BARRIER] = "Barrier",
|
||||
@ -3594,7 +3593,6 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_DATA_REQUEST] = "DataRequest",
|
||||
[P_RS_DATA_REQUEST] = "RSDataRequest",
|
||||
[P_SYNC_PARAM] = "SyncParam",
|
||||
[P_SYNC_PARAM89] = "SyncParam89",
|
||||
[P_PROTOCOL] = "ReportProtocol",
|
||||
[P_UUIDS] = "ReportUUIDs",
|
||||
[P_SIZES] = "ReportSizes",
|
||||
@ -3602,6 +3600,7 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_SYNC_UUID] = "ReportSyncUUID",
|
||||
[P_AUTH_CHALLENGE] = "AuthChallenge",
|
||||
[P_AUTH_RESPONSE] = "AuthResponse",
|
||||
[P_STATE_CHG_REQ] = "StateChgRequest",
|
||||
[P_PING] = "Ping",
|
||||
[P_PING_ACK] = "PingAck",
|
||||
[P_RECV_ACK] = "RecvAck",
|
||||
@ -3612,23 +3611,25 @@ const char *cmdname(enum drbd_packet cmd)
|
||||
[P_NEG_DREPLY] = "NegDReply",
|
||||
[P_NEG_RS_DREPLY] = "NegRSDReply",
|
||||
[P_BARRIER_ACK] = "BarrierAck",
|
||||
[P_STATE_CHG_REQ] = "StateChgRequest",
|
||||
[P_STATE_CHG_REPLY] = "StateChgReply",
|
||||
[P_OV_REQUEST] = "OVRequest",
|
||||
[P_OV_REPLY] = "OVReply",
|
||||
[P_OV_RESULT] = "OVResult",
|
||||
[P_CSUM_RS_REQUEST] = "CsumRSRequest",
|
||||
[P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
|
||||
[P_SYNC_PARAM89] = "SyncParam89",
|
||||
[P_COMPRESSED_BITMAP] = "CBitmap",
|
||||
[P_DELAY_PROBE] = "DelayProbe",
|
||||
[P_OUT_OF_SYNC] = "OutOfSync",
|
||||
[P_RETRY_WRITE] = "RetryWrite",
|
||||
[P_RS_CANCEL] = "RSCancel",
|
||||
[P_CONN_ST_CHG_REQ] = "conn_st_chg_req",
|
||||
[P_CONN_ST_CHG_REPLY] = "conn_st_chg_reply",
|
||||
[P_PROTOCOL_UPDATE] = "protocol_update",
|
||||
[P_TRIM] = "Trim",
|
||||
[P_RS_THIN_REQ] = "rs_thin_req",
|
||||
[P_RS_DEALLOCATED] = "rs_deallocated",
|
||||
[P_WSAME] = "WriteSame",
|
||||
[P_ZEROES] = "Zeroes",
|
||||
|
||||
/* enum drbd_packet, but not commands - obsoleted flags:
|
||||
* P_MAY_IGNORE
|
||||
|
@ -770,6 +770,7 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
|
||||
struct set_role_parms parms;
|
||||
int err;
|
||||
enum drbd_ret_code retcode;
|
||||
enum drbd_state_rv rv;
|
||||
|
||||
retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
|
||||
if (!adm_ctx.reply_skb)
|
||||
@ -790,14 +791,14 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
|
||||
mutex_lock(&adm_ctx.resource->adm_mutex);
|
||||
|
||||
if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
|
||||
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
|
||||
R_PRIMARY, parms.assume_uptodate);
|
||||
rv = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
|
||||
else
|
||||
retcode = (enum drbd_ret_code)drbd_set_role(adm_ctx.device,
|
||||
R_SECONDARY, 0);
|
||||
rv = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
|
||||
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
genl_lock();
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
out:
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
return 0;
|
||||
@ -1601,8 +1602,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
|
||||
drbd_send_sync_param(peer_device);
|
||||
}
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
kfree(old_plan);
|
||||
mod_timer(&device->request_timer, jiffies + HZ);
|
||||
goto success;
|
||||
@ -2433,8 +2433,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
mutex_unlock(&connection->data.mutex);
|
||||
synchronize_rcu();
|
||||
kfree(old_net_conf);
|
||||
kvfree_rcu(old_net_conf);
|
||||
|
||||
if (connection->cstate >= C_WF_REPORT_PARAMS) {
|
||||
struct drbd_peer_device *peer_device;
|
||||
@ -2492,6 +2491,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
struct drbd_resource *resource;
|
||||
struct drbd_connection *connection;
|
||||
enum drbd_ret_code retcode;
|
||||
enum drbd_state_rv rv;
|
||||
int i;
|
||||
int err;
|
||||
|
||||
@ -2611,12 +2611,11 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
retcode = (enum drbd_ret_code)conn_request_state(connection,
|
||||
NS(conn, C_UNCONNECTED), CS_VERBOSE);
|
||||
rv = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
|
||||
|
||||
conn_reconfig_done(connection);
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
@ -2724,11 +2723,12 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
|
||||
|
||||
mutex_lock(&adm_ctx.resource->adm_mutex);
|
||||
rv = conn_try_disconnect(connection, parms.force_disconnect);
|
||||
if (rv < SS_SUCCESS)
|
||||
retcode = (enum drbd_ret_code)rv;
|
||||
else
|
||||
retcode = NO_ERROR;
|
||||
mutex_unlock(&adm_ctx.resource->adm_mutex);
|
||||
if (rv < SS_SUCCESS) {
|
||||
drbd_adm_finish(&adm_ctx, info, rv);
|
||||
return 0;
|
||||
}
|
||||
retcode = NO_ERROR;
|
||||
fail:
|
||||
drbd_adm_finish(&adm_ctx, info, retcode);
|
||||
return 0;
|
||||
@ -2847,8 +2847,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
|
||||
new_disk_conf->disk_size = (sector_t)rs.resize_size;
|
||||
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
|
||||
mutex_unlock(&device->resource->conf_update);
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
new_disk_conf = NULL;
|
||||
}
|
||||
|
||||
|
@ -364,7 +364,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
|
||||
struct drbd_device *device = peer_device->device;
|
||||
struct drbd_peer_request *peer_req;
|
||||
struct page *page = NULL;
|
||||
unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
unsigned int nr_pages = PFN_UP(payload_size);
|
||||
|
||||
if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
|
||||
return NULL;
|
||||
@ -1628,9 +1628,9 @@ int drbd_submit_peer_request(struct drbd_device *device,
|
||||
struct bio *bio;
|
||||
struct page *page = peer_req->pages;
|
||||
sector_t sector = peer_req->i.sector;
|
||||
unsigned data_size = peer_req->i.size;
|
||||
unsigned n_bios = 0;
|
||||
unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
unsigned int data_size = peer_req->i.size;
|
||||
unsigned int n_bios = 0;
|
||||
unsigned int nr_pages = PFN_UP(data_size);
|
||||
|
||||
/* TRIM/DISCARD: for now, always use the helper function
|
||||
* blkdev_issue_zeroout(..., discard=true).
|
||||
@ -3750,8 +3750,7 @@ static int receive_protocol(struct drbd_connection *connection, struct packet_in
|
||||
drbd_info(connection, "peer data-integrity-alg: %s\n",
|
||||
integrity_alg[0] ? integrity_alg : "(none)");
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_net_conf);
|
||||
kvfree_rcu(old_net_conf);
|
||||
return 0;
|
||||
|
||||
disconnect_rcu_unlock:
|
||||
@ -3902,7 +3901,6 @@ static int receive_SyncParam(struct drbd_connection *connection, struct packet_i
|
||||
drbd_err(device, "verify-alg of wrong size, "
|
||||
"peer wants %u, accepting only up to %u byte\n",
|
||||
data_size, SHARED_SECRET_MAX);
|
||||
err = -EIO;
|
||||
goto reconnect;
|
||||
}
|
||||
|
||||
@ -4120,8 +4118,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
|
||||
|
||||
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
synchronize_rcu();
|
||||
kfree(old_disk_conf);
|
||||
kvfree_rcu(old_disk_conf);
|
||||
|
||||
drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
|
||||
(unsigned long)p_usize, (unsigned long)my_usize);
|
||||
|
@ -922,7 +922,7 @@ static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
|
||||
|
||||
switch (rbm) {
|
||||
case RB_CONGESTED_REMOTE:
|
||||
return 0;
|
||||
return false;
|
||||
case RB_LEAST_PENDING:
|
||||
return atomic_read(&device->local_cnt) >
|
||||
atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
|
||||
|
@ -2071,8 +2071,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
|
||||
conn_free_crypto(connection);
|
||||
mutex_unlock(&connection->resource->conf_update);
|
||||
|
||||
synchronize_rcu();
|
||||
kfree(old_conf);
|
||||
kvfree_rcu(old_conf);
|
||||
}
|
||||
|
||||
if (ns_max.susp_fen) {
|
||||
|
@ -1030,7 +1030,7 @@ static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_
|
||||
{
|
||||
if (drbd_peer_req_has_active_page(peer_req)) {
|
||||
/* This might happen if sendpage() has not finished */
|
||||
int i = (peer_req->i.size + PAGE_SIZE -1) >> PAGE_SHIFT;
|
||||
int i = PFN_UP(peer_req->i.size);
|
||||
atomic_add(i, &device->pp_in_use_by_net);
|
||||
atomic_sub(i, &device->pp_in_use);
|
||||
spin_lock_irq(&device->resource->req_lock);
|
||||
|
@ -1,54 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* linux/drivers/block/loop.c
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
*
|
||||
* DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
|
||||
* more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
|
||||
*
|
||||
* Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
|
||||
* Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
|
||||
*
|
||||
* Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
|
||||
*
|
||||
* Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
|
||||
*
|
||||
* Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
|
||||
*
|
||||
* Loadable modules and other fixes by AK, 1998
|
||||
*
|
||||
* Make real block number available to downstream transfer functions, enables
|
||||
* CBC (and relatives) mode encryption requiring unique IVs per data block.
|
||||
* Reed H. Petty, rhp@draper.net
|
||||
*
|
||||
* Maximum number of loop devices now dynamic via max_loop module parameter.
|
||||
* Russell Kroll <rkroll@exploits.org> 19990701
|
||||
*
|
||||
* Maximum number of loop devices when compiled-in now selectable by passing
|
||||
* max_loop=<1-255> to the kernel on boot.
|
||||
* Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
|
||||
*
|
||||
* Completely rewrite request handling to be make_request_fn style and
|
||||
* non blocking, pushing work to a helper thread. Lots of fixes from
|
||||
* Al Viro too.
|
||||
* Jens Axboe <axboe@suse.de>, Nov 2000
|
||||
*
|
||||
* Support up to 256 loop devices
|
||||
* Heinz Mauelshagen <mge@sistina.com>, Feb 2002
|
||||
*
|
||||
* Support for falling back on the write file operation when the address space
|
||||
* operations write_begin is not available on the backing filesystem.
|
||||
* Anton Altaparmakov, 16 Feb 2005
|
||||
*
|
||||
* Still To Fix:
|
||||
* - Advisory locking is ignored here.
|
||||
* - Should use an own CAP_* category instead of CAP_SYS_ADMIN
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/sched.h>
|
||||
@ -59,7 +12,6 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blkpg.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/swap.h>
|
||||
@ -80,10 +32,62 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/statfs.h>
|
||||
|
||||
#include "loop.h"
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <uapi/linux/loop.h>
|
||||
|
||||
/* Possible states of device */
|
||||
enum {
|
||||
Lo_unbound,
|
||||
Lo_bound,
|
||||
Lo_rundown,
|
||||
Lo_deleting,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
loff_t lo_offset;
|
||||
loff_t lo_sizelimit;
|
||||
int lo_flags;
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct block_device *lo_device;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
||||
spinlock_t lo_lock;
|
||||
int lo_state;
|
||||
spinlock_t lo_work_lock;
|
||||
struct workqueue_struct *workqueue;
|
||||
struct work_struct rootcg_work;
|
||||
struct list_head rootcg_cmd_list;
|
||||
struct list_head idle_worker_list;
|
||||
struct rb_root worker_tree;
|
||||
struct timer_list timer;
|
||||
bool use_dio;
|
||||
bool sysfs_inited;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *lo_disk;
|
||||
struct mutex lo_mutex;
|
||||
bool idr_visible;
|
||||
};
|
||||
|
||||
struct loop_cmd {
|
||||
struct list_head list_entry;
|
||||
bool use_aio; /* use AIO interface to handle I/O */
|
||||
atomic_t ref; /* only for aio */
|
||||
long ret;
|
||||
struct kiocb iocb;
|
||||
struct bio_vec *bvec;
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
struct cgroup_subsys_state *memcg_css;
|
||||
};
|
||||
|
||||
#define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ)
|
||||
#define LOOP_DEFAULT_HW_Q_DEPTH (128)
|
||||
@ -569,6 +573,10 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
|
||||
/* suppress uevents while reconfiguring the device */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
|
||||
|
||||
is_loop = is_loop_device(file);
|
||||
error = loop_global_lock_killable(lo, is_loop);
|
||||
if (error)
|
||||
@ -623,13 +631,18 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
fput(old_file);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo);
|
||||
return 0;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
return error;
|
||||
|
||||
out_err:
|
||||
loop_global_unlock(lo, is_loop);
|
||||
out_putf:
|
||||
fput(file);
|
||||
return error;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* loop sysfs attributes */
|
||||
@ -789,7 +802,6 @@ static void loop_config_discard(struct loop_device *lo)
|
||||
blk_queue_max_discard_sectors(q, 0);
|
||||
blk_queue_max_write_zeroes_sectors(q, 0);
|
||||
}
|
||||
q->limits.discard_alignment = 0;
|
||||
}
|
||||
|
||||
struct loop_worker {
|
||||
@ -803,8 +815,6 @@ struct loop_worker {
|
||||
};
|
||||
|
||||
static void loop_workfn(struct work_struct *work);
|
||||
static void loop_rootcg_workfn(struct work_struct *work);
|
||||
static void loop_free_idle_workers(struct timer_list *timer);
|
||||
|
||||
#ifdef CONFIG_BLK_CGROUP
|
||||
static inline int queue_on_root_worker(struct cgroup_subsys_state *css)
|
||||
@ -888,6 +898,39 @@ static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd)
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static void loop_set_timer(struct loop_device *lo)
|
||||
{
|
||||
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers(struct loop_device *lo, bool delete_all)
|
||||
{
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
if (!delete_all &&
|
||||
time_is_after_jiffies(worker->last_ran_at +
|
||||
LOOP_IDLE_WORKER_TIMEOUT))
|
||||
break;
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
if (!list_empty(&lo->idle_worker_list))
|
||||
loop_set_timer(lo);
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers_timer(struct timer_list *timer)
|
||||
{
|
||||
struct loop_device *lo = container_of(timer, struct loop_device, timer);
|
||||
|
||||
return loop_free_idle_workers(lo, false);
|
||||
}
|
||||
|
||||
static void loop_update_rotational(struct loop_device *lo)
|
||||
{
|
||||
struct file *file = lo->lo_backing_file;
|
||||
@ -962,6 +1005,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
/* This is safe, since we have a reference from open(). */
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
/* suppress uevents while reconfiguring the device */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
|
||||
|
||||
/*
|
||||
* If we don't hold exclusive handle for the device, upgrade to it
|
||||
* here to avoid changing device under exclusive owner.
|
||||
@ -1006,24 +1052,19 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
!file->f_op->write_iter)
|
||||
lo->lo_flags |= LO_FLAGS_READ_ONLY;
|
||||
|
||||
lo->workqueue = alloc_workqueue("loop%d",
|
||||
WQ_UNBOUND | WQ_FREEZABLE,
|
||||
0,
|
||||
lo->lo_number);
|
||||
if (!lo->workqueue) {
|
||||
error = -ENOMEM;
|
||||
goto out_unlock;
|
||||
lo->workqueue = alloc_workqueue("loop%d",
|
||||
WQ_UNBOUND | WQ_FREEZABLE,
|
||||
0, lo->lo_number);
|
||||
if (!lo->workqueue) {
|
||||
error = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
|
||||
|
||||
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
|
||||
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
|
||||
INIT_LIST_HEAD(&lo->idle_worker_list);
|
||||
lo->worker_tree = RB_ROOT;
|
||||
timer_setup(&lo->timer, loop_free_idle_workers,
|
||||
TIMER_DEFERRABLE);
|
||||
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
|
||||
lo->lo_device = bdev;
|
||||
lo->lo_backing_file = file;
|
||||
@ -1068,7 +1109,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
loop_reread_partitions(lo);
|
||||
if (!(mode & FMODE_EXCL))
|
||||
bd_abort_claiming(bdev, loop_configure);
|
||||
return 0;
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
/* enable and uncork uevent now that we are done */
|
||||
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0);
|
||||
return error;
|
||||
|
||||
out_unlock:
|
||||
loop_global_unlock(lo, is_loop);
|
||||
@ -1079,53 +1125,24 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
||||
fput(file);
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
return error;
|
||||
goto done;
|
||||
}
|
||||
|
||||
static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
{
|
||||
struct file *filp;
|
||||
gfp_t gfp = lo->old_gfp_mask;
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
/*
|
||||
* Flush loop_configure() and loop_change_fd(). It is acceptable for
|
||||
* loop_validate_file() to succeed, for actual clear operation has not
|
||||
* started yet.
|
||||
*/
|
||||
mutex_lock(&loop_validate_mutex);
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
/*
|
||||
* loop_validate_file() now fails because l->lo_state != Lo_bound
|
||||
* became visible.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Since this function is called upon "ioctl(LOOP_CLR_FD)" xor "close()
|
||||
* after ioctl(LOOP_CLR_FD)", it is a sign of something going wrong if
|
||||
* lo->lo_state has changed while waiting for lo->lo_mutex.
|
||||
*/
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
BUG_ON(lo->lo_state != Lo_rundown);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
|
||||
blk_queue_write_cache(lo->lo_queue, false, false);
|
||||
|
||||
/* freeze request queue during the transition */
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
destroy_workqueue(lo->workqueue);
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
del_timer_sync(&lo->timer);
|
||||
/*
|
||||
* Freeze the request queue when unbinding on a live file descriptor and
|
||||
* thus an open device. When called from ->release we are guaranteed
|
||||
* that there is no I/O in progress already.
|
||||
*/
|
||||
if (!release)
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
spin_lock_irq(&lo->lo_lock);
|
||||
filp = lo->lo_backing_file;
|
||||
@ -1146,7 +1163,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
|
||||
mapping_set_gfp_mask(filp->f_mapping, gfp);
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
if (!release)
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
|
||||
|
||||
@ -1197,11 +1215,20 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
/*
|
||||
* Since lo_ioctl() is called without locks held, it is possible that
|
||||
* loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel.
|
||||
*
|
||||
* Therefore, use global lock when setting Lo_rundown state in order to
|
||||
* make sure that loop_validate_file() will fail if the "struct file"
|
||||
* which loop_configure()/loop_change_fd() found via fget() was this
|
||||
* loop device.
|
||||
*/
|
||||
err = loop_global_lock_killable(lo, true);
|
||||
if (err)
|
||||
return err;
|
||||
if (lo->lo_state != Lo_bound) {
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
return -ENXIO;
|
||||
}
|
||||
/*
|
||||
@ -1214,13 +1241,13 @@ static int loop_clr_fd(struct loop_device *lo)
|
||||
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
|
||||
* command to fail with EBUSY.
|
||||
*/
|
||||
if (atomic_read(&lo->lo_refcnt) > 1) {
|
||||
if (disk_openers(lo->lo_disk) > 1) {
|
||||
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
return 0;
|
||||
}
|
||||
lo->lo_state = Lo_rundown;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, true);
|
||||
|
||||
__loop_clr_fd(lo, false);
|
||||
return 0;
|
||||
@ -1252,15 +1279,6 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
|
||||
/* I/O need to be drained during transfer transition */
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
/* If any pages were dirtied after invalidate_bdev(), try again */
|
||||
err = -EAGAIN;
|
||||
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name,
|
||||
lo->lo_device->bd_inode->i_mapping->nrpages);
|
||||
goto out_unfreeze;
|
||||
}
|
||||
|
||||
prev_lo_flags = lo->lo_flags;
|
||||
|
||||
err = loop_set_status_from_info(lo, info);
|
||||
@ -1471,21 +1489,10 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
invalidate_bdev(lo->lo_device);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
/* invalidate_bdev should have truncated all the pages */
|
||||
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
err = -EAGAIN;
|
||||
pr_warn("%s: loop%d (%s) still has dirty pages (nrpages=%lu)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name,
|
||||
lo->lo_device->bd_inode->i_mapping->nrpages);
|
||||
goto out_unfreeze;
|
||||
}
|
||||
|
||||
blk_queue_logical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_physical_block_size(lo->lo_queue, arg);
|
||||
blk_queue_io_min(lo->lo_queue, arg);
|
||||
loop_update_dio(lo);
|
||||
out_unfreeze:
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
|
||||
return err;
|
||||
@ -1715,33 +1722,15 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
|
||||
}
|
||||
#endif
|
||||
|
||||
static int lo_open(struct block_device *bdev, fmode_t mode)
|
||||
{
|
||||
struct loop_device *lo = bdev->bd_disk->private_data;
|
||||
int err;
|
||||
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (err)
|
||||
return err;
|
||||
if (lo->lo_state == Lo_deleting)
|
||||
err = -ENXIO;
|
||||
else
|
||||
atomic_inc(&lo->lo_refcnt);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (atomic_dec_return(&lo->lo_refcnt))
|
||||
goto out_unlock;
|
||||
if (disk_openers(disk) > 0)
|
||||
return;
|
||||
|
||||
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
|
||||
if (lo->lo_state != Lo_bound)
|
||||
goto out_unlock;
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
|
||||
lo->lo_state = Lo_rundown;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
/*
|
||||
@ -1750,27 +1739,30 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
|
||||
*/
|
||||
__loop_clr_fd(lo, true);
|
||||
return;
|
||||
} else if (lo->lo_state == Lo_bound) {
|
||||
/*
|
||||
* Otherwise keep thread (if running) and config,
|
||||
* but flush possible ongoing bios in thread.
|
||||
*/
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
}
|
||||
|
||||
static void lo_free_disk(struct gendisk *disk)
|
||||
{
|
||||
struct loop_device *lo = disk->private_data;
|
||||
|
||||
if (lo->workqueue)
|
||||
destroy_workqueue(lo->workqueue);
|
||||
loop_free_idle_workers(lo, true);
|
||||
del_timer_sync(&lo->timer);
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
}
|
||||
|
||||
static const struct block_device_operations lo_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = lo_open,
|
||||
.release = lo_release,
|
||||
.ioctl = lo_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = lo_compat_ioctl,
|
||||
#endif
|
||||
.free_disk = lo_free_disk,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -1885,11 +1877,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
|
||||
}
|
||||
}
|
||||
|
||||
static void loop_set_timer(struct loop_device *lo)
|
||||
{
|
||||
timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT);
|
||||
}
|
||||
|
||||
static void loop_process_work(struct loop_worker *worker,
|
||||
struct list_head *cmd_list, struct loop_device *lo)
|
||||
{
|
||||
@ -1938,27 +1925,6 @@ static void loop_rootcg_workfn(struct work_struct *work)
|
||||
loop_process_work(NULL, &lo->rootcg_cmd_list, lo);
|
||||
}
|
||||
|
||||
static void loop_free_idle_workers(struct timer_list *timer)
|
||||
{
|
||||
struct loop_device *lo = container_of(timer, struct loop_device, timer);
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
spin_lock_irq(&lo->lo_work_lock);
|
||||
list_for_each_entry_safe(worker, pos, &lo->idle_worker_list,
|
||||
idle_list) {
|
||||
if (time_is_after_jiffies(worker->last_ran_at +
|
||||
LOOP_IDLE_WORKER_TIMEOUT))
|
||||
break;
|
||||
list_del(&worker->idle_list);
|
||||
rb_erase(&worker->rb_node, &lo->worker_tree);
|
||||
css_put(worker->blkcg_css);
|
||||
kfree(worker);
|
||||
}
|
||||
if (!list_empty(&lo->idle_worker_list))
|
||||
loop_set_timer(lo);
|
||||
spin_unlock_irq(&lo->lo_work_lock);
|
||||
}
|
||||
|
||||
static const struct blk_mq_ops loop_mq_ops = {
|
||||
.queue_rq = loop_queue_rq,
|
||||
.complete = lo_complete_rq,
|
||||
@ -1974,6 +1940,9 @@ static int loop_add(int i)
|
||||
lo = kzalloc(sizeof(*lo), GFP_KERNEL);
|
||||
if (!lo)
|
||||
goto out;
|
||||
lo->worker_tree = RB_ROOT;
|
||||
INIT_LIST_HEAD(&lo->idle_worker_list);
|
||||
timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE);
|
||||
lo->lo_state = Lo_unbound;
|
||||
|
||||
err = mutex_lock_killable(&loop_ctl_mutex);
|
||||
@ -2043,11 +2012,12 @@ static int loop_add(int i)
|
||||
*/
|
||||
if (!part_shift)
|
||||
disk->flags |= GENHD_FL_NO_PART;
|
||||
atomic_set(&lo->lo_refcnt, 0);
|
||||
mutex_init(&lo->lo_mutex);
|
||||
lo->lo_number = i;
|
||||
spin_lock_init(&lo->lo_lock);
|
||||
spin_lock_init(&lo->lo_work_lock);
|
||||
INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn);
|
||||
INIT_LIST_HEAD(&lo->rootcg_cmd_list);
|
||||
disk->major = LOOP_MAJOR;
|
||||
disk->first_minor = i << part_shift;
|
||||
disk->minors = 1 << part_shift;
|
||||
@ -2087,15 +2057,14 @@ static void loop_remove(struct loop_device *lo)
|
||||
{
|
||||
/* Make this loop device unreachable from pathname. */
|
||||
del_gendisk(lo->lo_disk);
|
||||
blk_cleanup_disk(lo->lo_disk);
|
||||
blk_cleanup_queue(lo->lo_disk->queue);
|
||||
blk_mq_free_tag_set(&lo->tag_set);
|
||||
|
||||
mutex_lock(&loop_ctl_mutex);
|
||||
idr_remove(&loop_index_idr, lo->lo_number);
|
||||
mutex_unlock(&loop_ctl_mutex);
|
||||
/* There is no route which can find this loop device. */
|
||||
mutex_destroy(&lo->lo_mutex);
|
||||
kfree(lo);
|
||||
|
||||
put_disk(lo->lo_disk);
|
||||
}
|
||||
|
||||
static void loop_probe(dev_t dev)
|
||||
@ -2134,13 +2103,12 @@ static int loop_control_remove(int idx)
|
||||
ret = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (ret)
|
||||
goto mark_visible;
|
||||
if (lo->lo_state != Lo_unbound ||
|
||||
atomic_read(&lo->lo_refcnt) > 0) {
|
||||
if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) {
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
ret = -EBUSY;
|
||||
goto mark_visible;
|
||||
}
|
||||
/* Mark this loop device no longer open()-able. */
|
||||
/* Mark this loop device as no more bound, but not quite unbound yet */
|
||||
lo->lo_state = Lo_deleting;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
|
||||
|
@ -1,72 +0,0 @@
|
||||
/*
|
||||
* loop.h
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93.
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
*/
|
||||
#ifndef _LINUX_LOOP_H
|
||||
#define _LINUX_LOOP_H
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blk-mq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <uapi/linux/loop.h>
|
||||
|
||||
/* Possible states of device */
|
||||
enum {
|
||||
Lo_unbound,
|
||||
Lo_bound,
|
||||
Lo_rundown,
|
||||
Lo_deleting,
|
||||
};
|
||||
|
||||
struct loop_func_table;
|
||||
|
||||
struct loop_device {
|
||||
int lo_number;
|
||||
atomic_t lo_refcnt;
|
||||
loff_t lo_offset;
|
||||
loff_t lo_sizelimit;
|
||||
int lo_flags;
|
||||
char lo_file_name[LO_NAME_SIZE];
|
||||
|
||||
struct file * lo_backing_file;
|
||||
struct block_device *lo_device;
|
||||
|
||||
gfp_t old_gfp_mask;
|
||||
|
||||
spinlock_t lo_lock;
|
||||
int lo_state;
|
||||
spinlock_t lo_work_lock;
|
||||
struct workqueue_struct *workqueue;
|
||||
struct work_struct rootcg_work;
|
||||
struct list_head rootcg_cmd_list;
|
||||
struct list_head idle_worker_list;
|
||||
struct rb_root worker_tree;
|
||||
struct timer_list timer;
|
||||
bool use_dio;
|
||||
bool sysfs_inited;
|
||||
|
||||
struct request_queue *lo_queue;
|
||||
struct blk_mq_tag_set tag_set;
|
||||
struct gendisk *lo_disk;
|
||||
struct mutex lo_mutex;
|
||||
bool idr_visible;
|
||||
};
|
||||
|
||||
struct loop_cmd {
|
||||
struct list_head list_entry;
|
||||
bool use_aio; /* use AIO interface to handle I/O */
|
||||
atomic_t ref; /* only for aio */
|
||||
long ret;
|
||||
struct kiocb iocb;
|
||||
struct bio_vec *bvec;
|
||||
struct cgroup_subsys_state *blkcg_css;
|
||||
struct cgroup_subsys_state *memcg_css;
|
||||
};
|
||||
|
||||
#endif
|
@ -2729,7 +2729,7 @@ static int mtip_dma_alloc(struct driver_data *dd)
|
||||
{
|
||||
struct mtip_port *port = dd->port;
|
||||
|
||||
/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
|
||||
/* Allocate dma memory for RX Fis, Identify, and Sector Buffer */
|
||||
port->block1 =
|
||||
dma_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
|
||||
&port->block1_dma, GFP_KERNEL);
|
||||
|
@ -333,7 +333,6 @@ static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize,
|
||||
|
||||
if (nbd->config->flags & NBD_FLAG_SEND_TRIM) {
|
||||
nbd->disk->queue->limits.discard_granularity = blksize;
|
||||
nbd->disk->queue->limits.discard_alignment = blksize;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX);
|
||||
}
|
||||
blk_queue_logical_block_size(nbd->disk->queue, blksize);
|
||||
@ -947,11 +946,15 @@ static int wait_for_reconnect(struct nbd_device *nbd)
|
||||
struct nbd_config *config = nbd->config;
|
||||
if (!config->dead_conn_timeout)
|
||||
return 0;
|
||||
if (test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags))
|
||||
|
||||
if (!wait_event_timeout(config->conn_wait,
|
||||
test_bit(NBD_RT_DISCONNECTED,
|
||||
&config->runtime_flags) ||
|
||||
atomic_read(&config->live_connections) > 0,
|
||||
config->dead_conn_timeout))
|
||||
return 0;
|
||||
return wait_event_timeout(config->conn_wait,
|
||||
atomic_read(&config->live_connections) > 0,
|
||||
config->dead_conn_timeout) > 0;
|
||||
|
||||
return !test_bit(NBD_RT_DISCONNECTED, &config->runtime_flags);
|
||||
}
|
||||
|
||||
static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
|
||||
@ -1217,11 +1220,11 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static void nbd_bdev_reset(struct block_device *bdev)
|
||||
static void nbd_bdev_reset(struct nbd_device *nbd)
|
||||
{
|
||||
if (bdev->bd_openers > 1)
|
||||
if (disk_openers(nbd->disk) > 1)
|
||||
return;
|
||||
set_capacity(bdev->bd_disk, 0);
|
||||
set_capacity(nbd->disk, 0);
|
||||
}
|
||||
|
||||
static void nbd_parse_flags(struct nbd_device *nbd)
|
||||
@ -1316,7 +1319,6 @@ static void nbd_config_put(struct nbd_device *nbd)
|
||||
|
||||
nbd->tag_set.timeout = 0;
|
||||
nbd->disk->queue->limits.discard_granularity = 0;
|
||||
nbd->disk->queue->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(nbd->disk->queue, 0);
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
@ -1386,7 +1388,7 @@ static int nbd_start_device(struct nbd_device *nbd)
|
||||
return nbd_set_size(nbd, config->bytesize, nbd_blksize(config));
|
||||
}
|
||||
|
||||
static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev)
|
||||
static int nbd_start_device_ioctl(struct nbd_device *nbd)
|
||||
{
|
||||
struct nbd_config *config = nbd->config;
|
||||
int ret;
|
||||
@ -1405,7 +1407,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
|
||||
flush_workqueue(nbd->recv_workq);
|
||||
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd_bdev_reset(bdev);
|
||||
nbd_bdev_reset(nbd);
|
||||
/* user requested, ignore socket errors */
|
||||
if (test_bit(NBD_RT_DISCONNECT_REQUESTED, &config->runtime_flags))
|
||||
ret = 0;
|
||||
@ -1419,7 +1421,7 @@ static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
|
||||
{
|
||||
sock_shutdown(nbd);
|
||||
__invalidate_device(bdev, true);
|
||||
nbd_bdev_reset(bdev);
|
||||
nbd_bdev_reset(nbd);
|
||||
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
|
||||
&nbd->config->runtime_flags))
|
||||
nbd_config_put(nbd);
|
||||
@ -1465,7 +1467,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
config->flags = arg;
|
||||
return 0;
|
||||
case NBD_DO_IT:
|
||||
return nbd_start_device_ioctl(nbd, bdev);
|
||||
return nbd_start_device_ioctl(nbd);
|
||||
case NBD_CLEAR_QUE:
|
||||
/*
|
||||
* This is for compatibility only. The queue is always cleared
|
||||
@ -1576,7 +1578,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode)
|
||||
struct nbd_device *nbd = disk->private_data;
|
||||
|
||||
if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
|
||||
disk->part0->bd_openers == 0)
|
||||
disk_openers(disk) == 0)
|
||||
nbd_disconnect_and_put(nbd);
|
||||
|
||||
nbd_config_put(nbd);
|
||||
@ -1781,7 +1783,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
|
||||
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
|
||||
disk->queue->limits.discard_granularity = 0;
|
||||
disk->queue->limits.discard_alignment = 0;
|
||||
blk_queue_max_discard_sectors(disk->queue, 0);
|
||||
blk_queue_max_segment_size(disk->queue, UINT_MAX);
|
||||
blk_queue_max_segments(disk->queue, USHRT_MAX);
|
||||
@ -2079,6 +2080,7 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd_disconnect(nbd);
|
||||
sock_shutdown(nbd);
|
||||
wake_up(&nbd->config->conn_wait);
|
||||
/*
|
||||
* Make sure recv thread has finished, we can safely call nbd_clear_que()
|
||||
* to cancel the inflight I/Os.
|
||||
|
@ -11,6 +11,9 @@
|
||||
#include <linux/init.h>
|
||||
#include "null_blk.h"
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "null_blk: " fmt
|
||||
|
||||
#define FREE_BATCH 16
|
||||
|
||||
#define TICKS_PER_SEC 50ULL
|
||||
@ -232,6 +235,7 @@ static struct nullb_device *null_alloc_dev(void);
|
||||
static void null_free_dev(struct nullb_device *dev);
|
||||
static void null_del_dev(struct nullb *nullb);
|
||||
static int null_add_dev(struct nullb_device *dev);
|
||||
static struct nullb *null_find_dev_by_name(const char *name);
|
||||
static void null_free_device_storage(struct nullb_device *dev, bool is_cache);
|
||||
|
||||
static inline struct nullb_device *to_nullb_device(struct config_item *item)
|
||||
@ -560,6 +564,9 @@ config_item *nullb_group_make_item(struct config_group *group, const char *name)
|
||||
{
|
||||
struct nullb_device *dev;
|
||||
|
||||
if (null_find_dev_by_name(name))
|
||||
return ERR_PTR(-EEXIST);
|
||||
|
||||
dev = null_alloc_dev();
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -1765,7 +1772,6 @@ static void null_config_discard(struct nullb *nullb)
|
||||
}
|
||||
|
||||
nullb->q->limits.discard_granularity = nullb->dev->blocksize;
|
||||
nullb->q->limits.discard_alignment = nullb->dev->blocksize;
|
||||
blk_queue_max_discard_sectors(nullb->q, UINT_MAX >> 9);
|
||||
}
|
||||
|
||||
@ -2060,7 +2066,13 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
|
||||
null_config_discard(nullb);
|
||||
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
if (config_item_name(&dev->item)) {
|
||||
/* Use configfs dir name as the device name */
|
||||
snprintf(nullb->disk_name, sizeof(nullb->disk_name),
|
||||
"%s", config_item_name(&dev->item));
|
||||
} else {
|
||||
sprintf(nullb->disk_name, "nullb%d", nullb->index);
|
||||
}
|
||||
|
||||
rv = null_gendisk_register(nullb);
|
||||
if (rv)
|
||||
@ -2070,6 +2082,8 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
list_add_tail(&nullb->list, &nullb_list);
|
||||
mutex_unlock(&lock);
|
||||
|
||||
pr_info("disk %s created\n", nullb->disk_name);
|
||||
|
||||
return 0;
|
||||
out_cleanup_zone:
|
||||
null_free_zoned_dev(dev);
|
||||
@ -2087,12 +2101,53 @@ static int null_add_dev(struct nullb_device *dev)
|
||||
return rv;
|
||||
}
|
||||
|
||||
static struct nullb *null_find_dev_by_name(const char *name)
|
||||
{
|
||||
struct nullb *nullb = NULL, *nb;
|
||||
|
||||
mutex_lock(&lock);
|
||||
list_for_each_entry(nb, &nullb_list, list) {
|
||||
if (strcmp(nb->disk_name, name) == 0) {
|
||||
nullb = nb;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
return nullb;
|
||||
}
|
||||
|
||||
static int null_create_dev(void)
|
||||
{
|
||||
struct nullb_device *dev;
|
||||
int ret;
|
||||
|
||||
dev = null_alloc_dev();
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = null_add_dev(dev);
|
||||
if (ret) {
|
||||
null_free_dev(dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void null_destroy_dev(struct nullb *nullb)
|
||||
{
|
||||
struct nullb_device *dev = nullb->dev;
|
||||
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
}
|
||||
|
||||
static int __init null_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned int i;
|
||||
struct nullb *nullb;
|
||||
struct nullb_device *dev;
|
||||
|
||||
if (g_bs > PAGE_SIZE) {
|
||||
pr_warn("invalid block size\n");
|
||||
@ -2112,19 +2167,21 @@ static int __init null_init(void)
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_RQ) {
|
||||
pr_err("legacy IO path no longer available\n");
|
||||
pr_err("legacy IO path is no longer available\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
|
||||
if (g_submit_queues != nr_online_nodes) {
|
||||
pr_warn("submit_queues param is set to %u.\n",
|
||||
nr_online_nodes);
|
||||
nr_online_nodes);
|
||||
g_submit_queues = nr_online_nodes;
|
||||
}
|
||||
} else if (g_submit_queues > nr_cpu_ids)
|
||||
} else if (g_submit_queues > nr_cpu_ids) {
|
||||
g_submit_queues = nr_cpu_ids;
|
||||
else if (g_submit_queues <= 0)
|
||||
} else if (g_submit_queues <= 0) {
|
||||
g_submit_queues = 1;
|
||||
}
|
||||
|
||||
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
|
||||
ret = null_init_tag_set(NULL, &tag_set);
|
||||
@ -2148,16 +2205,9 @@ static int __init null_init(void)
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_devices; i++) {
|
||||
dev = null_alloc_dev();
|
||||
if (!dev) {
|
||||
ret = -ENOMEM;
|
||||
ret = null_create_dev();
|
||||
if (ret)
|
||||
goto err_dev;
|
||||
}
|
||||
ret = null_add_dev(dev);
|
||||
if (ret) {
|
||||
null_free_dev(dev);
|
||||
goto err_dev;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("module loaded\n");
|
||||
@ -2166,9 +2216,7 @@ static int __init null_init(void)
|
||||
err_dev:
|
||||
while (!list_empty(&nullb_list)) {
|
||||
nullb = list_entry(nullb_list.next, struct nullb, list);
|
||||
dev = nullb->dev;
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
null_destroy_dev(nullb);
|
||||
}
|
||||
unregister_blkdev(null_major, "nullb");
|
||||
err_conf:
|
||||
@ -2189,12 +2237,8 @@ static void __exit null_exit(void)
|
||||
|
||||
mutex_lock(&lock);
|
||||
while (!list_empty(&nullb_list)) {
|
||||
struct nullb_device *dev;
|
||||
|
||||
nullb = list_entry(nullb_list.next, struct nullb, list);
|
||||
dev = nullb->dev;
|
||||
null_del_dev(nullb);
|
||||
null_free_dev(dev);
|
||||
null_destroy_dev(nullb);
|
||||
}
|
||||
mutex_unlock(&lock);
|
||||
|
||||
|
@ -16,13 +16,15 @@
|
||||
#include <linux/mutex.h>
|
||||
|
||||
struct nullb_cmd {
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
union {
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
};
|
||||
unsigned int tag;
|
||||
blk_status_t error;
|
||||
bool fake_timeout;
|
||||
struct nullb_queue *nq;
|
||||
struct hrtimer timer;
|
||||
bool fake_timeout;
|
||||
};
|
||||
|
||||
struct nullb_queue {
|
||||
|
@ -6,6 +6,9 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace.h"
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "null_blk: " fmt
|
||||
|
||||
static inline sector_t mb_to_sects(unsigned long mb)
|
||||
{
|
||||
return ((sector_t)mb * SZ_1M) >> SECTOR_SHIFT;
|
||||
@ -75,8 +78,8 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
|
||||
dev->zone_capacity = dev->zone_size;
|
||||
|
||||
if (dev->zone_capacity > dev->zone_size) {
|
||||
pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n",
|
||||
dev->zone_capacity, dev->zone_size);
|
||||
pr_err("zone capacity (%lu MB) larger than zone size (%lu MB)\n",
|
||||
dev->zone_capacity, dev->zone_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,7 @@ static int rnbd_client_major;
|
||||
static DEFINE_IDA(index_ida);
|
||||
static DEFINE_MUTEX(sess_lock);
|
||||
static LIST_HEAD(sess_list);
|
||||
static struct workqueue_struct *rnbd_clt_wq;
|
||||
|
||||
/*
|
||||
* Maximum number of partitions an instance can have.
|
||||
@ -1759,12 +1760,12 @@ static void rnbd_destroy_sessions(void)
|
||||
* procedure takes minutes.
|
||||
*/
|
||||
INIT_WORK(&dev->unmap_on_rmmod_work, unmap_device_work);
|
||||
queue_work(system_long_wq, &dev->unmap_on_rmmod_work);
|
||||
queue_work(rnbd_clt_wq, &dev->unmap_on_rmmod_work);
|
||||
}
|
||||
rnbd_clt_put_sess(sess);
|
||||
}
|
||||
/* Wait for all scheduled unmap works */
|
||||
flush_workqueue(system_long_wq);
|
||||
flush_workqueue(rnbd_clt_wq);
|
||||
WARN_ON(!list_empty(&sess_list));
|
||||
}
|
||||
|
||||
@ -1789,6 +1790,14 @@ static int __init rnbd_client_init(void)
|
||||
pr_err("Failed to load module, creating sysfs device files failed, err: %d\n",
|
||||
err);
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
return err;
|
||||
}
|
||||
rnbd_clt_wq = alloc_workqueue("rnbd_clt_wq", 0, 0);
|
||||
if (!rnbd_clt_wq) {
|
||||
pr_err("Failed to load module, alloc_workqueue failed.\n");
|
||||
rnbd_clt_destroy_sysfs_files();
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
err = -ENOMEM;
|
||||
}
|
||||
|
||||
return err;
|
||||
@ -1799,6 +1808,7 @@ static void __exit rnbd_client_exit(void)
|
||||
rnbd_destroy_sessions();
|
||||
unregister_blkdev(rnbd_client_major, "rnbd");
|
||||
ida_destroy(&index_ida);
|
||||
destroy_workqueue(rnbd_clt_wq);
|
||||
}
|
||||
|
||||
module_init(rnbd_client_init);
|
||||
|
@ -59,7 +59,7 @@ static inline int rnbd_dev_get_discard_granularity(const struct rnbd_dev *dev)
|
||||
|
||||
static inline int rnbd_dev_get_discard_alignment(const struct rnbd_dev *dev)
|
||||
{
|
||||
return bdev_get_queue(dev->bdev)->limits.discard_alignment;
|
||||
return bdev_discard_alignment(dev->bdev);
|
||||
}
|
||||
|
||||
#endif /* RNBD_SRV_DEV_H */
|
||||
|
@ -867,11 +867,12 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||
blk_queue_io_opt(q, blk_size * opt_io_size);
|
||||
|
||||
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
|
||||
q->limits.discard_granularity = blk_size;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
discard_sector_alignment, &v);
|
||||
q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
|
||||
if (v)
|
||||
q->limits.discard_granularity = v << SECTOR_SHIFT;
|
||||
else
|
||||
q->limits.discard_granularity = blk_size;
|
||||
|
||||
virtio_cread(vdev, struct virtio_blk_config,
|
||||
max_discard_sectors, &v);
|
||||
|
@ -575,7 +575,6 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
|
||||
int err;
|
||||
int state = 0;
|
||||
struct block_device *bdev = be->blkif->vbd.bdev;
|
||||
struct request_queue *q = bdev_get_queue(bdev);
|
||||
|
||||
if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
|
||||
return;
|
||||
@ -583,14 +582,14 @@ static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info
|
||||
if (bdev_max_discard_sectors(bdev)) {
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"discard-granularity", "%u",
|
||||
q->limits.discard_granularity);
|
||||
bdev_discard_granularity(bdev));
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
|
||||
return;
|
||||
}
|
||||
err = xenbus_printf(xbt, dev->nodename,
|
||||
"discard-alignment", "%u",
|
||||
q->limits.discard_alignment);
|
||||
bdev_discard_alignment(bdev));
|
||||
if (err) {
|
||||
dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
|
||||
return;
|
||||
|
@ -1787,7 +1787,7 @@ static ssize_t reset_store(struct device *dev,
|
||||
int ret;
|
||||
unsigned short do_reset;
|
||||
struct zram *zram;
|
||||
struct block_device *bdev;
|
||||
struct gendisk *disk;
|
||||
|
||||
ret = kstrtou16(buf, 10, &do_reset);
|
||||
if (ret)
|
||||
@ -1797,26 +1797,26 @@ static ssize_t reset_store(struct device *dev,
|
||||
return -EINVAL;
|
||||
|
||||
zram = dev_to_zram(dev);
|
||||
bdev = zram->disk->part0;
|
||||
disk = zram->disk;
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
/* Do not reset an active device or claimed device */
|
||||
if (bdev->bd_openers || zram->claim) {
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
if (disk_openers(disk) || zram->claim) {
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* From now on, anyone can't open /dev/zram[0-9] */
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
/* Make sure all the pending I/O are finished */
|
||||
sync_blockdev(bdev);
|
||||
sync_blockdev(disk->part0);
|
||||
zram_reset_device(zram);
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&disk->open_mutex);
|
||||
zram->claim = false;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&disk->open_mutex);
|
||||
|
||||
return len;
|
||||
}
|
||||
@ -1987,19 +1987,18 @@ static int zram_add(void)
|
||||
|
||||
static int zram_remove(struct zram *zram)
|
||||
{
|
||||
struct block_device *bdev = zram->disk->part0;
|
||||
bool claimed;
|
||||
|
||||
mutex_lock(&bdev->bd_disk->open_mutex);
|
||||
if (bdev->bd_openers) {
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_lock(&zram->disk->open_mutex);
|
||||
if (disk_openers(zram->disk)) {
|
||||
mutex_unlock(&zram->disk->open_mutex);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
claimed = zram->claim;
|
||||
if (!claimed)
|
||||
zram->claim = true;
|
||||
mutex_unlock(&bdev->bd_disk->open_mutex);
|
||||
mutex_unlock(&zram->disk->open_mutex);
|
||||
|
||||
zram_debugfs_unregister(zram);
|
||||
|
||||
@ -2011,7 +2010,7 @@ static int zram_remove(struct zram *zram)
|
||||
;
|
||||
} else {
|
||||
/* Make sure all the pending I/O are finished */
|
||||
sync_blockdev(bdev);
|
||||
sync_blockdev(zram->disk->part0);
|
||||
zram_reset_device(zram);
|
||||
}
|
||||
|
||||
|
@ -1001,7 +1001,7 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
||||
blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
|
||||
blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
|
||||
|
||||
limits->discard_alignment = DMZ_BLOCK_SIZE;
|
||||
limits->discard_alignment = 0;
|
||||
limits->discard_granularity = DMZ_BLOCK_SIZE;
|
||||
limits->max_discard_sectors = chunk_sectors;
|
||||
limits->max_hw_discard_sectors = chunk_sectors;
|
||||
|
@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
|
||||
write_behind = le32_to_cpu(sb->write_behind);
|
||||
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
|
||||
/* Setup nodes/clustername only if bitmap version is
|
||||
* cluster-compatible
|
||||
*/
|
||||
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
|
||||
sb->cluster_name, 64);
|
||||
}
|
||||
|
||||
/* verify that the bitmap-specific fields are valid */
|
||||
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
|
||||
@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup nodes/clustername only if bitmap version is
|
||||
* cluster-compatible
|
||||
*/
|
||||
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
|
||||
nodes = le32_to_cpu(sb->nodes);
|
||||
strscpy(bitmap->mddev->bitmap_info.cluster_name,
|
||||
sb->cluster_name, 64);
|
||||
}
|
||||
|
||||
/* keep the array size field of the bitmap superblock up to date */
|
||||
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
|
||||
|
||||
@ -695,14 +697,13 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
|
||||
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
|
||||
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
|
||||
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
|
||||
err = 0;
|
||||
|
||||
out:
|
||||
kunmap_atomic(sb);
|
||||
/* Assigning chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
|
||||
/* Assigning chunksize is required for "re_read" */
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
err = md_setup_cluster(bitmap->mddev, nodes);
|
||||
if (err) {
|
||||
pr_warn("%s: Could not setup cluster service (%d)\n",
|
||||
@ -713,18 +714,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
|
||||
goto re_read;
|
||||
}
|
||||
|
||||
|
||||
out_no_sb:
|
||||
if (test_bit(BITMAP_STALE, &bitmap->flags))
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
|
||||
bitmap->mddev->bitmap_info.nodes = nodes;
|
||||
if (bitmap->mddev->bitmap_info.space == 0 ||
|
||||
bitmap->mddev->bitmap_info.space > sectors_reserved)
|
||||
bitmap->mddev->bitmap_info.space = sectors_reserved;
|
||||
if (err) {
|
||||
if (err == 0) {
|
||||
if (test_bit(BITMAP_STALE, &bitmap->flags))
|
||||
bitmap->events_cleared = bitmap->mddev->events;
|
||||
bitmap->mddev->bitmap_info.chunksize = chunksize;
|
||||
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
|
||||
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
|
||||
bitmap->mddev->bitmap_info.nodes = nodes;
|
||||
if (bitmap->mddev->bitmap_info.space == 0 ||
|
||||
bitmap->mddev->bitmap_info.space > sectors_reserved)
|
||||
bitmap->mddev->bitmap_info.space = sectors_reserved;
|
||||
} else {
|
||||
md_bitmap_print_sb(bitmap);
|
||||
if (bitmap->cluster_slot < 0)
|
||||
md_cluster_stop(bitmap->mddev);
|
||||
|
@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
|
||||
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
|
||||
goto out_err;
|
||||
}
|
||||
strlcpy(res->name, name, namelen + 1);
|
||||
strscpy(res->name, name, namelen + 1);
|
||||
if (with_lvb) {
|
||||
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
|
||||
if (!res->lksb.sb_lvbptr) {
|
||||
|
@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)
|
||||
|
||||
static bool does_sb_need_changing(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *rdev = NULL, *iter;
|
||||
struct mdp_superblock_1 *sb;
|
||||
int role;
|
||||
|
||||
/* Find a good rdev */
|
||||
rdev_for_each(rdev, mddev)
|
||||
if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
|
||||
rdev_for_each(iter, mddev)
|
||||
if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
|
||||
rdev = iter;
|
||||
break;
|
||||
}
|
||||
|
||||
/* No good device found. */
|
||||
if (!rdev)
|
||||
@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
|
||||
/* Device activated? */
|
||||
if (role == 0xffff && rdev->raid_disk >=0 &&
|
||||
if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
return true;
|
||||
/* Device turned faulty? */
|
||||
if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
|
||||
if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
||||
|
||||
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
|
||||
md_error(rdev->mddev, rdev);
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
err = 0;
|
||||
else
|
||||
|
||||
if (test_bit(MD_BROKEN, &rdev->mddev->flags))
|
||||
err = -EBUSY;
|
||||
else
|
||||
err = 0;
|
||||
} else if (cmd_match(buf, "remove")) {
|
||||
if (rdev->mddev->pers) {
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
||||
oldpriv = mddev->private;
|
||||
mddev->pers = pers;
|
||||
mddev->private = priv;
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
mddev->level = mddev->new_level;
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = mddev->new_chunk_sectors;
|
||||
@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
|
||||
* like active, but no writes have been seen for a while (100msec).
|
||||
*
|
||||
* broken
|
||||
* RAID0/LINEAR-only: same as clean, but array is missing a member.
|
||||
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
|
||||
* when a member is gone, so this state will at least alert the
|
||||
* user that something is wrong.
|
||||
* Array is failed. It's useful because mounted-arrays aren't stopped
|
||||
* when array is failed, so this state will at least alert the user that
|
||||
* something is wrong.
|
||||
*/
|
||||
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
|
||||
write_pending, active_idle, broken, bad_word};
|
||||
@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
|
||||
len--;
|
||||
if (len >= DISK_NAME_LEN)
|
||||
return -E2BIG;
|
||||
strlcpy(buf, val, len+1);
|
||||
strscpy(buf, val, len+1);
|
||||
if (strncmp(buf, "md_", 3) == 0)
|
||||
return md_alloc(0, buf);
|
||||
if (strncmp(buf, "md", 2) == 0 &&
|
||||
@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
|
||||
mddev->level = pers->level;
|
||||
mddev->new_level = pers->level;
|
||||
}
|
||||
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
|
||||
|
||||
if (mddev->reshape_position != MaxSector &&
|
||||
pers->start_reshape == NULL) {
|
||||
@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
|
||||
err = -ENODEV;
|
||||
else {
|
||||
md_error(mddev, rdev);
|
||||
if (!test_bit(Faulty, &rdev->flags))
|
||||
if (test_bit(MD_BROKEN, &mddev->flags))
|
||||
err = -EBUSY;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -7984,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
if (!mddev->pers || !mddev->pers->error_handler)
|
||||
return;
|
||||
mddev->pers->error_handler(mddev,rdev);
|
||||
if (mddev->degraded)
|
||||
mddev->pers->error_handler(mddev, rdev);
|
||||
|
||||
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
if (!test_bit(MD_BROKEN, &mddev->flags)) {
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
md_wakeup_thread(mddev->thread);
|
||||
}
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
md_new_event();
|
||||
@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);
|
||||
|
||||
if (test_bit(Candidate, &rdev2->flags)) {
|
||||
if (role == 0xfffe) {
|
||||
if (role == MD_DISK_ROLE_FAULTY) {
|
||||
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
|
||||
md_kick_rdev_from_array(rdev2);
|
||||
continue;
|
||||
@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
/*
|
||||
* got activated except reshape is happening.
|
||||
*/
|
||||
if (rdev2->raid_disk == -1 && role != 0xffff &&
|
||||
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
|
||||
!(le32_to_cpu(sb->feature_map) &
|
||||
MD_FEATURE_RESHAPE_ACTIVE)) {
|
||||
rdev2->saved_raid_disk = role;
|
||||
@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
|
||||
* as faulty. The recovery is performed by the
|
||||
* one who initiated the error.
|
||||
*/
|
||||
if ((role == 0xfffe) || (role == 0xfffd)) {
|
||||
if (role == MD_DISK_ROLE_FAULTY ||
|
||||
role == MD_DISK_ROLE_JOURNAL) {
|
||||
md_error(mddev, rdev2);
|
||||
clear_bit(Blocked, &rdev2->flags);
|
||||
}
|
||||
@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)
|
||||
|
||||
void md_reload_sb(struct mddev *mddev, int nr)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *rdev = NULL, *iter;
|
||||
int err;
|
||||
|
||||
/* Find the rdev */
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
if (rdev->desc_nr == nr)
|
||||
rdev_for_each_rcu(iter, mddev) {
|
||||
if (iter->desc_nr == nr) {
|
||||
rdev = iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rdev || rdev->desc_nr != nr) {
|
||||
if (!rdev) {
|
||||
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
|
||||
return;
|
||||
}
|
||||
|
@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
int is_new);
|
||||
struct md_cluster_info;
|
||||
|
||||
/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
|
||||
/**
|
||||
* enum mddev_flags - md device flags.
|
||||
* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
|
||||
* @MD_CLOSING: If set, we are closing the array, do not open it then.
|
||||
* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
|
||||
* @MD_HAS_JOURNAL: The raid array has journal feature set.
|
||||
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
|
||||
* resync lock, need to release the lock.
|
||||
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
|
||||
* calls to md_error() will never cause the array to
|
||||
* become failed.
|
||||
* @MD_HAS_PPL: The raid array has PPL feature set.
|
||||
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
|
||||
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
|
||||
* without taking reconfig_mutex.
|
||||
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
|
||||
* explicitly holding reconfig_mutex.
|
||||
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
|
||||
* array is ready yet.
|
||||
* @MD_BROKEN: This is used to stop writes and mark array as failed.
|
||||
*
|
||||
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
|
||||
*/
|
||||
enum mddev_flags {
|
||||
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
|
||||
MD_CLOSING, /* If set, we are closing the array, do not open
|
||||
* it then */
|
||||
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
|
||||
MD_HAS_JOURNAL, /* The raid array has journal feature set */
|
||||
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
|
||||
* already took resync lock, need to
|
||||
* release the lock */
|
||||
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
|
||||
* supported as calls to md_error() will
|
||||
* never cause the array to become failed.
|
||||
*/
|
||||
MD_HAS_PPL, /* The raid array has PPL feature set */
|
||||
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
|
||||
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
|
||||
* the metadata without taking reconfig_mutex.
|
||||
*/
|
||||
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
|
||||
* without explicitly holding reconfig_mutex.
|
||||
*/
|
||||
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
|
||||
* must not report that array is ready yet
|
||||
*/
|
||||
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
|
||||
* I/O in case an array member is gone/failed.
|
||||
*/
|
||||
MD_ARRAY_FIRST_USE,
|
||||
MD_CLOSING,
|
||||
MD_JOURNAL_CLEAN,
|
||||
MD_HAS_JOURNAL,
|
||||
MD_CLUSTER_RESYNC_LOCKED,
|
||||
MD_FAILFAST_SUPPORTED,
|
||||
MD_HAS_PPL,
|
||||
MD_HAS_MULTIPLE_PPLS,
|
||||
MD_ALLOW_SB_UPDATE,
|
||||
MD_UPDATING_SB,
|
||||
MD_NOT_READY,
|
||||
MD_BROKEN,
|
||||
};
|
||||
|
||||
enum mddev_sb_flags {
|
||||
|
@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
pr_debug("md/raid0:%s: FINAL %d zones\n",
|
||||
mdname(mddev), conf->nr_strip_zones);
|
||||
|
||||
if (conf->nr_strip_zones == 1) {
|
||||
conf->layout = RAID0_ORIG_LAYOUT;
|
||||
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
|
||||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = mddev->layout;
|
||||
} else if (default_layout == RAID0_ORIG_LAYOUT ||
|
||||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = default_layout;
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
|
||||
err = -ENOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
/*
|
||||
* now since we have the hard sector sizes, we can make sure
|
||||
* chunk size is a multiple of that sector size
|
||||
@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
|
||||
(unsigned long long)smallest->sectors);
|
||||
}
|
||||
|
||||
if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
|
||||
conf->layout = RAID0_ORIG_LAYOUT;
|
||||
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
|
||||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = mddev->layout;
|
||||
} else if (default_layout == RAID0_ORIG_LAYOUT ||
|
||||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
|
||||
conf->layout = default_layout;
|
||||
} else {
|
||||
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
|
||||
mdname(mddev));
|
||||
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
|
||||
err = -EOPNOTSUPP;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
pr_debug("md/raid0:%s: done.\n", mdname(mddev));
|
||||
*private_conf = conf;
|
||||
|
||||
|
@ -1641,30 +1641,39 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
|
||||
seq_printf(seq, "]");
|
||||
}
|
||||
|
||||
/**
|
||||
* raid1_error() - RAID1 error handler.
|
||||
* @mddev: affected md device.
|
||||
* @rdev: member device to fail.
|
||||
*
|
||||
* The routine acknowledges &rdev failure and determines new @mddev state.
|
||||
* If it failed, then:
|
||||
* - &MD_BROKEN flag is set in &mddev->flags.
|
||||
* - recovery is disabled.
|
||||
* Otherwise, it must be degraded:
|
||||
* - recovery is interrupted.
|
||||
* - &mddev->degraded is bumped.
|
||||
*
|
||||
* @rdev is marked as &Faulty excluding case when array is failed and
|
||||
* &mddev->fail_last_dev is off.
|
||||
*/
|
||||
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct r1conf *conf = mddev->private;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& (conf->raid_disks - mddev->degraded) == 1) {
|
||||
/*
|
||||
* Don't fail the drive, act as though we were just a
|
||||
* normal single drive.
|
||||
* However don't try a recovery from this drive as
|
||||
* it is very likely to fail.
|
||||
*/
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) &&
|
||||
(conf->raid_disks - mddev->degraded) == 1) {
|
||||
set_bit(MD_BROKEN, &mddev->flags);
|
||||
|
||||
if (!mddev->fail_last_dev) {
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags))
|
||||
|
@ -1970,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
|
||||
_enough(conf, 1, ignore);
|
||||
}
|
||||
|
||||
/**
|
||||
* raid10_error() - RAID10 error handler.
|
||||
* @mddev: affected md device.
|
||||
* @rdev: member device to fail.
|
||||
*
|
||||
* The routine acknowledges &rdev failure and determines new @mddev state.
|
||||
* If it failed, then:
|
||||
* - &MD_BROKEN flag is set in &mddev->flags.
|
||||
* Otherwise, it must be degraded:
|
||||
* - recovery is interrupted.
|
||||
* - &mddev->degraded is bumped.
|
||||
|
||||
* @rdev is marked as &Faulty excluding case when array is failed and
|
||||
* &mddev->fail_last_dev is off.
|
||||
*/
|
||||
static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
char b[BDEVNAME_SIZE];
|
||||
struct r10conf *conf = mddev->private;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* If it is not operational, then we have already marked it as dead
|
||||
* else if it is the last working disks with "fail_last_dev == false",
|
||||
* ignore the error, let the next level up know.
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
|
||||
&& !enough(conf, rdev->raid_disk)) {
|
||||
/*
|
||||
* Don't fail the drive, just return an IO error.
|
||||
*/
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
|
||||
set_bit(MD_BROKEN, &mddev->flags);
|
||||
|
||||
if (!mddev->fail_last_dev) {
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags))
|
||||
mddev->degraded++;
|
||||
/*
|
||||
* If recovery is running, make sure it aborts.
|
||||
*/
|
||||
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
|
@ -883,7 +883,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
(unsigned long long)r_sector, dd_idx,
|
||||
(unsigned long long)sector);
|
||||
|
||||
rdev = conf->disks[dd_idx].rdev;
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
rdev = rcu_dereference_protected(
|
||||
conf->disks[dd_idx].rdev, 1);
|
||||
if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
|
||||
sector >= rdev->recovery_offset)) {
|
||||
pr_debug("%s:%*s data member disk %d missing\n",
|
||||
@ -934,7 +936,10 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
|
||||
parity_sector = raid5_compute_sector(conf, r_sector_first + i,
|
||||
0, &disk, &sh);
|
||||
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
|
||||
parity_rdev = conf->disks[sh.pd_idx].rdev;
|
||||
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
parity_rdev = rcu_dereference_protected(
|
||||
conf->disks[sh.pd_idx].rdev, 1);
|
||||
|
||||
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
|
||||
pr_debug("%s:%*s write parity at sector %llu, disk %s\n",
|
||||
@ -1404,7 +1409,9 @@ int ppl_init_log(struct r5conf *conf)
|
||||
|
||||
for (i = 0; i < ppl_conf->count; i++) {
|
||||
struct ppl_log *log = &ppl_conf->child_logs[i];
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
/* Array has not started so rcu dereference is safe */
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference_protected(conf->disks[i].rdev, 1);
|
||||
|
||||
mutex_init(&log->io_mutex);
|
||||
spin_lock_init(&log->io_list_lock);
|
||||
|
@ -79,18 +79,21 @@ static inline int stripe_hash_locks_hash(struct r5conf *conf, sector_t sect)
|
||||
}
|
||||
|
||||
static inline void lock_device_hash_lock(struct r5conf *conf, int hash)
|
||||
__acquires(&conf->device_lock)
|
||||
{
|
||||
spin_lock_irq(conf->hash_locks + hash);
|
||||
spin_lock(&conf->device_lock);
|
||||
}
|
||||
|
||||
static inline void unlock_device_hash_lock(struct r5conf *conf, int hash)
|
||||
__releases(&conf->device_lock)
|
||||
{
|
||||
spin_unlock(&conf->device_lock);
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
}
|
||||
|
||||
static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
__acquires(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
spin_lock_irq(conf->hash_locks);
|
||||
@ -100,6 +103,7 @@ static inline void lock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
}
|
||||
|
||||
static inline void unlock_all_device_hash_locks_irq(struct r5conf *conf)
|
||||
__releases(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
spin_unlock(&conf->device_lock);
|
||||
@ -164,6 +168,7 @@ static bool stripe_is_lowprio(struct stripe_head *sh)
|
||||
}
|
||||
|
||||
static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
|
||||
__must_hold(&sh->raid_conf->device_lock)
|
||||
{
|
||||
struct r5conf *conf = sh->raid_conf;
|
||||
struct r5worker_group *group;
|
||||
@ -211,6 +216,7 @@ static void raid5_wakeup_stripe_thread(struct stripe_head *sh)
|
||||
|
||||
static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
int i;
|
||||
int injournal = 0; /* number of date pages with R5_InJournal */
|
||||
@ -296,6 +302,7 @@ static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
|
||||
static void __release_stripe(struct r5conf *conf, struct stripe_head *sh,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
if (atomic_dec_and_test(&sh->count))
|
||||
do_release_stripe(conf, sh, temp_inactive_list);
|
||||
@ -350,9 +357,9 @@ static void release_inactive_stripe_list(struct r5conf *conf,
|
||||
}
|
||||
}
|
||||
|
||||
/* should hold conf->device_lock already */
|
||||
static int release_stripe_list(struct r5conf *conf,
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *sh, *t;
|
||||
int count = 0;
|
||||
@ -629,6 +636,10 @@ static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
|
||||
* This is because some failed devices may only affect one
|
||||
* of the two sections, and some non-in_sync devices may
|
||||
* be insync in the section most affected by failed devices.
|
||||
*
|
||||
* Most calls to this function hold &conf->device_lock. Calls
|
||||
* in raid5_run() do not require the lock as no other threads
|
||||
* have been started yet.
|
||||
*/
|
||||
int raid5_calc_degraded(struct r5conf *conf)
|
||||
{
|
||||
@ -686,17 +697,17 @@ int raid5_calc_degraded(struct r5conf *conf)
|
||||
return degraded;
|
||||
}
|
||||
|
||||
static int has_failed(struct r5conf *conf)
|
||||
static bool has_failed(struct r5conf *conf)
|
||||
{
|
||||
int degraded;
|
||||
int degraded = conf->mddev->degraded;
|
||||
|
||||
if (conf->mddev->reshape_position == MaxSector)
|
||||
return conf->mddev->degraded > conf->max_degraded;
|
||||
if (test_bit(MD_BROKEN, &conf->mddev->flags))
|
||||
return true;
|
||||
|
||||
degraded = raid5_calc_degraded(conf);
|
||||
if (degraded > conf->max_degraded)
|
||||
return 1;
|
||||
return 0;
|
||||
if (conf->mddev->reshape_position != MaxSector)
|
||||
degraded = raid5_calc_degraded(conf);
|
||||
|
||||
return degraded > conf->max_degraded;
|
||||
}
|
||||
|
||||
struct stripe_head *
|
||||
@ -2648,6 +2659,28 @@ static void shrink_stripes(struct r5conf *conf)
|
||||
conf->slab_cache = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and can be used when
|
||||
* it is known that the nr_pending of the rdev is elevated.
|
||||
*/
|
||||
static struct md_rdev *rdev_pend_deref(struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
atomic_read(&rcu_access_pointer(rdev)->nr_pending));
|
||||
}
|
||||
|
||||
/*
|
||||
* This helper wraps rcu_dereference_protected() and should be used
|
||||
* when it is known that the mddev_lock() is held. This is safe
|
||||
* seeing raid5_remove_disk() has the same lock held.
|
||||
*/
|
||||
static struct md_rdev *rdev_mdlock_deref(struct mddev *mddev,
|
||||
struct md_rdev __rcu *rdev)
|
||||
{
|
||||
return rcu_dereference_protected(rdev,
|
||||
lockdep_is_held(&mddev->reconfig_mutex));
|
||||
}
|
||||
|
||||
static void raid5_end_read_request(struct bio * bi)
|
||||
{
|
||||
struct stripe_head *sh = bi->bi_private;
|
||||
@ -2674,9 +2707,9 @@ static void raid5_end_read_request(struct bio * bi)
|
||||
* In that case it moved down to 'rdev'.
|
||||
* rdev is not removed until all requests are finished.
|
||||
*/
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (!rdev)
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
|
||||
if (use_new_offset(conf, sh))
|
||||
s = sh->sector + rdev->new_data_offset;
|
||||
@ -2790,11 +2823,11 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
|
||||
for (i = 0 ; i < disks; i++) {
|
||||
if (bi == &sh->dev[i].req) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
break;
|
||||
}
|
||||
if (bi == &sh->dev[i].rreq) {
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (rdev)
|
||||
replacement = 1;
|
||||
else
|
||||
@ -2802,7 +2835,7 @@ static void raid5_end_write_request(struct bio *bi)
|
||||
* replaced it. rdev is not removed
|
||||
* until all requests are finished.
|
||||
*/
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -2863,34 +2896,31 @@ static void raid5_error(struct mddev *mddev, struct md_rdev *rdev)
|
||||
unsigned long flags;
|
||||
pr_debug("raid456: error called\n");
|
||||
|
||||
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n",
|
||||
mdname(mddev), bdevname(rdev->bdev, b));
|
||||
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags) &&
|
||||
mddev->degraded == conf->max_degraded) {
|
||||
/*
|
||||
* Don't allow to achieve failed state
|
||||
* Don't try to recover this device
|
||||
*/
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
mddev->degraded = raid5_calc_degraded(conf);
|
||||
|
||||
if (has_failed(conf)) {
|
||||
set_bit(MD_BROKEN, &conf->mddev->flags);
|
||||
conf->recovery_disabled = mddev->recovery_disabled;
|
||||
|
||||
pr_crit("md/raid:%s: Cannot continue operation (%d/%d failed).\n",
|
||||
mdname(mddev), mddev->degraded, conf->raid_disks);
|
||||
} else {
|
||||
pr_crit("md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev), conf->raid_disks - mddev->degraded);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
|
||||
set_bit(Blocked, &rdev->flags);
|
||||
set_mask_bits(&mddev->sb_flags, 0,
|
||||
BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
|
||||
pr_crit("md/raid:%s: Disk failure on %s, disabling device.\n"
|
||||
"md/raid:%s: Operation continuing on %d devices.\n",
|
||||
mdname(mddev),
|
||||
bdevname(rdev->bdev, b),
|
||||
mdname(mddev),
|
||||
conf->raid_disks - mddev->degraded);
|
||||
r5c_update_on_rdev_error(mddev, rdev);
|
||||
}
|
||||
|
||||
@ -5213,23 +5243,23 @@ static void handle_stripe(struct stripe_head *sh)
|
||||
struct r5dev *dev = &sh->dev[i];
|
||||
if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
|
||||
/* We own a safe reference to the rdev */
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
if (!rdev_set_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0))
|
||||
md_error(conf->mddev, rdev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
}
|
||||
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_pend_deref(conf->disks[i].replacement);
|
||||
if (!rdev)
|
||||
/* rdev have been moved down */
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_pend_deref(conf->disks[i].rdev);
|
||||
rdev_clear_badblocks(rdev, sh->sector,
|
||||
RAID5_STRIPE_SECTORS(conf), 0);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
@ -5256,6 +5286,7 @@ static void handle_stripe(struct stripe_head *sh)
|
||||
}
|
||||
|
||||
static void raid5_activate_delayed(struct r5conf *conf)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) {
|
||||
while (!list_empty(&conf->delayed_list)) {
|
||||
@ -5273,9 +5304,9 @@ static void raid5_activate_delayed(struct r5conf *conf)
|
||||
}
|
||||
|
||||
static void activate_bit_delay(struct r5conf *conf,
|
||||
struct list_head *temp_inactive_list)
|
||||
struct list_head *temp_inactive_list)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
/* device_lock is held */
|
||||
struct list_head head;
|
||||
list_add(&head, &conf->bitmap_list);
|
||||
list_del_init(&conf->bitmap_list);
|
||||
@ -5500,6 +5531,7 @@ static struct bio *chunk_aligned_read(struct mddev *mddev, struct bio *raid_bio)
|
||||
* handle_list.
|
||||
*/
|
||||
static struct stripe_head *__get_priority_stripe(struct r5conf *conf, int group)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *sh, *tmp;
|
||||
struct list_head *handle_list = NULL;
|
||||
@ -6288,7 +6320,7 @@ static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_n
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = READ_ONCE(conf->disks[i].rdev);
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags))
|
||||
still_degraded = 1;
|
||||
@ -6371,8 +6403,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio,
|
||||
static int handle_active_stripes(struct r5conf *conf, int group,
|
||||
struct r5worker *worker,
|
||||
struct list_head *temp_inactive_list)
|
||||
__releases(&conf->device_lock)
|
||||
__acquires(&conf->device_lock)
|
||||
__must_hold(&conf->device_lock)
|
||||
{
|
||||
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
|
||||
int i, batch_size = 0, hash;
|
||||
@ -7166,7 +7197,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
int i;
|
||||
int group_cnt;
|
||||
struct r5worker_group *new_group;
|
||||
int ret;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
if (mddev->new_level != 5
|
||||
&& mddev->new_level != 4
|
||||
@ -7225,6 +7256,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
spin_lock_init(&conf->device_lock);
|
||||
seqcount_spinlock_init(&conf->gen_lock, &conf->device_lock);
|
||||
mutex_init(&conf->cache_size_mutex);
|
||||
|
||||
init_waitqueue_head(&conf->wait_for_quiescent);
|
||||
init_waitqueue_head(&conf->wait_for_stripe);
|
||||
init_waitqueue_head(&conf->wait_for_overlap);
|
||||
@ -7302,11 +7334,13 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
|
||||
conf->level = mddev->new_level;
|
||||
conf->chunk_sectors = mddev->new_chunk_sectors;
|
||||
if (raid5_alloc_percpu(conf) != 0)
|
||||
ret = raid5_alloc_percpu(conf);
|
||||
if (ret)
|
||||
goto abort;
|
||||
|
||||
pr_debug("raid456: run(%s) called.\n", mdname(mddev));
|
||||
|
||||
ret = -EIO;
|
||||
rdev_for_each(rdev, mddev) {
|
||||
raid_disk = rdev->raid_disk;
|
||||
if (raid_disk >= max_disks
|
||||
@ -7317,11 +7351,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (test_bit(Replacement, &rdev->flags)) {
|
||||
if (disk->replacement)
|
||||
goto abort;
|
||||
disk->replacement = rdev;
|
||||
RCU_INIT_POINTER(disk->replacement, rdev);
|
||||
} else {
|
||||
if (disk->rdev)
|
||||
goto abort;
|
||||
disk->rdev = rdev;
|
||||
RCU_INIT_POINTER(disk->rdev, rdev);
|
||||
}
|
||||
|
||||
if (test_bit(In_sync, &rdev->flags)) {
|
||||
@ -7370,6 +7404,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (grow_stripes(conf, conf->min_nr_stripes)) {
|
||||
pr_warn("md/raid:%s: couldn't allocate %dkB for buffers\n",
|
||||
mdname(mddev), memory);
|
||||
ret = -ENOMEM;
|
||||
goto abort;
|
||||
} else
|
||||
pr_debug("md/raid:%s: allocated %dkB\n", mdname(mddev), memory);
|
||||
@ -7383,7 +7418,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
conf->shrinker.count_objects = raid5_cache_count;
|
||||
conf->shrinker.batch = 128;
|
||||
conf->shrinker.flags = 0;
|
||||
if (register_shrinker(&conf->shrinker)) {
|
||||
ret = register_shrinker(&conf->shrinker);
|
||||
if (ret) {
|
||||
pr_warn("md/raid:%s: couldn't register shrinker.\n",
|
||||
mdname(mddev));
|
||||
goto abort;
|
||||
@ -7394,17 +7430,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
||||
if (!conf->thread) {
|
||||
pr_warn("md/raid:%s: couldn't allocate thread.\n",
|
||||
mdname(mddev));
|
||||
ret = -ENOMEM;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
return conf;
|
||||
|
||||
abort:
|
||||
if (conf) {
|
||||
if (conf)
|
||||
free_conf(conf);
|
||||
return ERR_PTR(-EIO);
|
||||
} else
|
||||
return ERR_PTR(-ENOMEM);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded)
|
||||
@ -7621,17 +7656,18 @@ static int raid5_run(struct mddev *mddev)
|
||||
|
||||
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
|
||||
i++) {
|
||||
rdev = conf->disks[i].rdev;
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
if (!rdev && conf->disks[i].replacement) {
|
||||
/* The replacement is all we have yet */
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
conf->disks[i].replacement = NULL;
|
||||
clear_bit(Replacement, &rdev->flags);
|
||||
conf->disks[i].rdev = rdev;
|
||||
rcu_assign_pointer(conf->disks[i].rdev, rdev);
|
||||
}
|
||||
if (!rdev)
|
||||
continue;
|
||||
if (conf->disks[i].replacement &&
|
||||
if (rcu_access_pointer(conf->disks[i].replacement) &&
|
||||
conf->reshape_progress != MaxSector) {
|
||||
/* replacements and reshape simply do not mix. */
|
||||
pr_warn("md: cannot handle concurrent replacement and reshape.\n");
|
||||
@ -7749,7 +7785,6 @@ static int raid5_run(struct mddev *mddev)
|
||||
*/
|
||||
stripe = stripe * PAGE_SIZE;
|
||||
stripe = roundup_pow_of_two(stripe);
|
||||
mddev->queue->limits.discard_alignment = stripe;
|
||||
mddev->queue->limits.discard_granularity = stripe;
|
||||
|
||||
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
|
||||
@ -7828,8 +7863,8 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
|
||||
|
||||
static void print_raid5_conf (struct r5conf *conf)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int i;
|
||||
struct disk_info *tmp;
|
||||
|
||||
pr_debug("RAID conf printout:\n");
|
||||
if (!conf) {
|
||||
@ -7840,50 +7875,54 @@ static void print_raid5_conf (struct r5conf *conf)
|
||||
conf->raid_disks,
|
||||
conf->raid_disks - conf->mddev->degraded);
|
||||
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
tmp = conf->disks + i;
|
||||
if (tmp->rdev)
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
if (rdev)
|
||||
pr_debug(" disk %d, o:%d, dev:%s\n",
|
||||
i, !test_bit(Faulty, &tmp->rdev->flags),
|
||||
bdevname(tmp->rdev->bdev, b));
|
||||
i, !test_bit(Faulty, &rdev->flags),
|
||||
bdevname(rdev->bdev, b));
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int raid5_spare_active(struct mddev *mddev)
|
||||
{
|
||||
int i;
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct disk_info *tmp;
|
||||
struct md_rdev *rdev, *replacement;
|
||||
int count = 0;
|
||||
unsigned long flags;
|
||||
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
tmp = conf->disks + i;
|
||||
if (tmp->replacement
|
||||
&& tmp->replacement->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &tmp->replacement->flags)
|
||||
&& !test_and_set_bit(In_sync, &tmp->replacement->flags)) {
|
||||
rdev = rdev_mdlock_deref(mddev, conf->disks[i].rdev);
|
||||
replacement = rdev_mdlock_deref(mddev,
|
||||
conf->disks[i].replacement);
|
||||
if (replacement
|
||||
&& replacement->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &replacement->flags)
|
||||
&& !test_and_set_bit(In_sync, &replacement->flags)) {
|
||||
/* Replacement has just become active. */
|
||||
if (!tmp->rdev
|
||||
|| !test_and_clear_bit(In_sync, &tmp->rdev->flags))
|
||||
if (!rdev
|
||||
|| !test_and_clear_bit(In_sync, &rdev->flags))
|
||||
count++;
|
||||
if (tmp->rdev) {
|
||||
if (rdev) {
|
||||
/* Replaced device not technically faulty,
|
||||
* but we need to be sure it gets removed
|
||||
* and never re-added.
|
||||
*/
|
||||
set_bit(Faulty, &tmp->rdev->flags);
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
sysfs_notify_dirent_safe(
|
||||
tmp->rdev->sysfs_state);
|
||||
rdev->sysfs_state);
|
||||
}
|
||||
sysfs_notify_dirent_safe(tmp->replacement->sysfs_state);
|
||||
} else if (tmp->rdev
|
||||
&& tmp->rdev->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &tmp->rdev->flags)
|
||||
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
||||
sysfs_notify_dirent_safe(replacement->sysfs_state);
|
||||
} else if (rdev
|
||||
&& rdev->recovery_offset == MaxSector
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_and_set_bit(In_sync, &rdev->flags)) {
|
||||
count++;
|
||||
sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
}
|
||||
}
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
@ -7898,8 +7937,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
struct r5conf *conf = mddev->private;
|
||||
int err = 0;
|
||||
int number = rdev->raid_disk;
|
||||
struct md_rdev **rdevp;
|
||||
struct md_rdev __rcu **rdevp;
|
||||
struct disk_info *p = conf->disks + number;
|
||||
struct md_rdev *tmp;
|
||||
|
||||
print_raid5_conf(conf);
|
||||
if (test_bit(Journal, &rdev->flags) && conf->log) {
|
||||
@ -7917,9 +7957,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
log_exit(conf);
|
||||
return 0;
|
||||
}
|
||||
if (rdev == p->rdev)
|
||||
if (rdev == rcu_access_pointer(p->rdev))
|
||||
rdevp = &p->rdev;
|
||||
else if (rdev == p->replacement)
|
||||
else if (rdev == rcu_access_pointer(p->replacement))
|
||||
rdevp = &p->replacement;
|
||||
else
|
||||
return 0;
|
||||
@ -7939,18 +7979,20 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
mddev->recovery_disabled != conf->recovery_disabled &&
|
||||
!has_failed(conf) &&
|
||||
(!p->replacement || p->replacement == rdev) &&
|
||||
(!rcu_access_pointer(p->replacement) ||
|
||||
rcu_access_pointer(p->replacement) == rdev) &&
|
||||
number < conf->raid_disks) {
|
||||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
lockdep_assert_held(&mddev->reconfig_mutex);
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
rcu_assign_pointer(*rdevp, rdev);
|
||||
}
|
||||
}
|
||||
if (!err) {
|
||||
@ -7958,17 +8000,19 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
if (err)
|
||||
goto abort;
|
||||
}
|
||||
if (p->replacement) {
|
||||
|
||||
tmp = rcu_access_pointer(p->replacement);
|
||||
if (tmp) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
p->rdev = p->replacement;
|
||||
clear_bit(Replacement, &p->replacement->flags);
|
||||
rcu_assign_pointer(p->rdev, tmp);
|
||||
clear_bit(Replacement, &tmp->flags);
|
||||
smp_mb(); /* Make sure other CPUs may see both as identical
|
||||
* but will never see neither - if they are careful
|
||||
*/
|
||||
p->replacement = NULL;
|
||||
rcu_assign_pointer(p->replacement, NULL);
|
||||
|
||||
if (!err)
|
||||
err = log_modify(conf, p->rdev, true);
|
||||
err = log_modify(conf, tmp, true);
|
||||
}
|
||||
|
||||
clear_bit(WantReplacement, &rdev->flags);
|
||||
@ -7984,6 +8028,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
int ret, err = -EEXIST;
|
||||
int disk;
|
||||
struct disk_info *p;
|
||||
struct md_rdev *tmp;
|
||||
int first = 0;
|
||||
int last = conf->raid_disks - 1;
|
||||
|
||||
@ -8041,7 +8086,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
||||
}
|
||||
for (disk = first; disk <= last; disk++) {
|
||||
p = conf->disks + disk;
|
||||
if (test_bit(WantReplacement, &p->rdev->flags) &&
|
||||
tmp = rdev_mdlock_deref(mddev, p->rdev);
|
||||
if (test_bit(WantReplacement, &tmp->flags) &&
|
||||
p->replacement == NULL) {
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
set_bit(Replacement, &rdev->flags);
|
||||
@ -8332,6 +8378,7 @@ static void end_reshape(struct r5conf *conf)
|
||||
static void raid5_finish_reshape(struct mddev *mddev)
|
||||
{
|
||||
struct r5conf *conf = mddev->private;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
|
||||
@ -8343,10 +8390,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
|
||||
for (d = conf->raid_disks ;
|
||||
d < conf->raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
struct md_rdev *rdev = conf->disks[d].rdev;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].rdev);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = conf->disks[d].replacement;
|
||||
rdev = rdev_mdlock_deref(mddev,
|
||||
conf->disks[d].replacement);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
|
@ -473,7 +473,8 @@ enum {
|
||||
*/
|
||||
|
||||
struct disk_info {
|
||||
struct md_rdev *rdev, *replacement;
|
||||
struct md_rdev __rcu *rdev;
|
||||
struct md_rdev __rcu *replacement;
|
||||
struct page *extra_page; /* extra page to use in prexor */
|
||||
};
|
||||
|
||||
@ -560,6 +561,16 @@ struct r5pending_data {
|
||||
struct bio_list bios;
|
||||
};
|
||||
|
||||
struct raid5_percpu {
|
||||
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
||||
void *scribble; /* space for constructing buffer
|
||||
* lists and performing address
|
||||
* conversions
|
||||
*/
|
||||
int scribble_obj_size;
|
||||
local_lock_t lock;
|
||||
};
|
||||
|
||||
struct r5conf {
|
||||
struct hlist_head *stripe_hashtbl;
|
||||
/* only protect corresponding hash list and inactive_list */
|
||||
@ -635,15 +646,7 @@ struct r5conf {
|
||||
*/
|
||||
int recovery_disabled;
|
||||
/* per cpu variables */
|
||||
struct raid5_percpu {
|
||||
struct page *spare_page; /* Used when checking P/Q in raid6 */
|
||||
void *scribble; /* space for constructing buffer
|
||||
* lists and performing address
|
||||
* conversions
|
||||
*/
|
||||
int scribble_obj_size;
|
||||
local_lock_t lock;
|
||||
} __percpu *percpu;
|
||||
struct raid5_percpu __percpu *percpu;
|
||||
int scribble_disks;
|
||||
int scribble_sectors;
|
||||
struct hlist_node node;
|
||||
|
@ -4,7 +4,6 @@
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates
|
||||
*/
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include "nvme.h"
|
||||
|
||||
#ifdef CONFIG_NVME_VERBOSE_ERRORS
|
||||
@ -92,6 +91,7 @@ static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
|
||||
[NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
|
||||
[NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
|
||||
[NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
|
||||
[NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
|
||||
[NVME_SC_LBA_RANGE] = "LBA Out of Range",
|
||||
[NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
|
||||
@ -155,10 +155,13 @@ static const char * const nvme_statuses[] = {
|
||||
[NVME_SC_COMPARE_FAILED] = "Compare Failure",
|
||||
[NVME_SC_ACCESS_DENIED] = "Access Denied",
|
||||
[NVME_SC_UNWRITTEN_BLOCK] = "Deallocated or Unwritten Logical Block",
|
||||
[NVME_SC_INTERNAL_PATH_ERROR] = "Internal Pathing Error",
|
||||
[NVME_SC_ANA_PERSISTENT_LOSS] = "Asymmetric Access Persistent Loss",
|
||||
[NVME_SC_ANA_INACCESSIBLE] = "Asymmetric Access Inaccessible",
|
||||
[NVME_SC_ANA_TRANSITION] = "Asymmetric Access Transition",
|
||||
[NVME_SC_CTRL_PATH_ERROR] = "Controller Pathing Error",
|
||||
[NVME_SC_HOST_PATH_ERROR] = "Host Pathing Error",
|
||||
[NVME_SC_HOST_ABORTED_CMD] = "Host Aborted Command",
|
||||
};
|
||||
|
||||
const unsigned char *nvme_get_error_status_str(u16 status)
|
||||
|
@ -1207,6 +1207,7 @@ static void nvme_keep_alive_work(struct work_struct *work)
|
||||
|
||||
rq->timeout = ctrl->kato * HZ;
|
||||
rq->end_io_data = ctrl;
|
||||
rq->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(rq, false, nvme_keep_alive_end_io);
|
||||
}
|
||||
|
||||
@ -1426,6 +1427,32 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
return error;
|
||||
}
|
||||
|
||||
static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
|
||||
struct nvme_id_ns_cs_indep **id)
|
||||
{
|
||||
struct nvme_command c = {
|
||||
.identify.opcode = nvme_admin_identify,
|
||||
.identify.nsid = cpu_to_le32(nsid),
|
||||
.identify.cns = NVME_ID_CNS_NS_CS_INDEP,
|
||||
};
|
||||
int ret;
|
||||
|
||||
*id = kmalloc(sizeof(**id), GFP_KERNEL);
|
||||
if (!*id)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
|
||||
if (ret) {
|
||||
dev_warn(ctrl->device,
|
||||
"Identify namespace (CS independent) failed (%d)\n",
|
||||
ret);
|
||||
kfree(*id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
|
||||
unsigned int dword11, void *buffer, size_t buflen, u32 *result)
|
||||
{
|
||||
@ -1628,13 +1655,15 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
|
||||
BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
|
||||
NVME_DSM_MAX_RANGES);
|
||||
|
||||
queue->limits.discard_alignment = 0;
|
||||
queue->limits.discard_granularity = size;
|
||||
|
||||
/* If discard is already enabled, don't reset queue limits */
|
||||
if (queue->limits.max_discard_sectors)
|
||||
return;
|
||||
|
||||
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns, UINT_MAX))
|
||||
ctrl->max_discard_sectors = nvme_lba_to_sect(ns, ctrl->dmrsl);
|
||||
|
||||
blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors);
|
||||
blk_queue_max_discard_segments(queue, ctrl->max_discard_segments);
|
||||
|
||||
@ -1771,7 +1800,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
|
||||
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
|
||||
}
|
||||
blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
|
||||
blk_queue_dma_alignment(q, 7);
|
||||
blk_queue_dma_alignment(q, 3);
|
||||
blk_queue_write_cache(q, vwc, vwc);
|
||||
}
|
||||
|
||||
@ -2100,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
|
||||
.pr_ops = &nvme_pr_ops,
|
||||
};
|
||||
|
||||
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
|
||||
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
|
||||
{
|
||||
unsigned long timeout =
|
||||
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
|
||||
unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
|
||||
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
|
||||
int ret;
|
||||
|
||||
@ -2116,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
|
||||
usleep_range(1000, 2000);
|
||||
if (fatal_signal_pending(current))
|
||||
return -EINTR;
|
||||
if (time_after(jiffies, timeout)) {
|
||||
if (time_after(jiffies, timeout_jiffies)) {
|
||||
dev_err(ctrl->device,
|
||||
"Device not ready; aborting %s, CSTS=0x%x\n",
|
||||
enabled ? "initialisation" : "reset", csts);
|
||||
@ -2147,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
|
||||
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
|
||||
msleep(NVME_QUIRK_DELAY_AMOUNT);
|
||||
|
||||
return nvme_wait_ready(ctrl, ctrl->cap, false);
|
||||
return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
|
||||
|
||||
int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
unsigned dev_page_min;
|
||||
u32 timeout;
|
||||
int ret;
|
||||
|
||||
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
|
||||
@ -2174,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
ctrl->ctrl_config = NVME_CC_CSS_CSI;
|
||||
else
|
||||
ctrl->ctrl_config = NVME_CC_CSS_NVM;
|
||||
|
||||
if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
|
||||
u32 crto;
|
||||
|
||||
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
|
||||
if (ret) {
|
||||
dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
|
||||
ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
|
||||
ctrl->ctrl_config |= NVME_CC_CRIME;
|
||||
timeout = NVME_CRTO_CRIMT(crto);
|
||||
} else {
|
||||
timeout = NVME_CRTO_CRWMT(crto);
|
||||
}
|
||||
} else {
|
||||
timeout = NVME_CAP_TIMEOUT(ctrl->cap);
|
||||
}
|
||||
|
||||
ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
|
||||
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
|
||||
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
|
||||
@ -2182,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
|
||||
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
|
||||
if (ret)
|
||||
return ret;
|
||||
return nvme_wait_ready(ctrl, ctrl->cap, true);
|
||||
return nvme_wait_ready(ctrl, timeout, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
|
||||
|
||||
@ -2894,8 +2944,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
|
||||
|
||||
if (id->dmrl)
|
||||
ctrl->max_discard_segments = id->dmrl;
|
||||
if (id->dmrsl)
|
||||
ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl);
|
||||
ctrl->dmrsl = le32_to_cpu(id->dmrsl);
|
||||
if (id->wzsl)
|
||||
ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl);
|
||||
|
||||
@ -3080,10 +3129,6 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = nvme_init_non_mdts_limits(ctrl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = nvme_configure_apst(ctrl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@ -4092,11 +4137,26 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
|
||||
static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
|
||||
{
|
||||
struct nvme_ns_ids ids = { };
|
||||
struct nvme_id_ns_cs_indep *id;
|
||||
struct nvme_ns *ns;
|
||||
bool ready = true;
|
||||
|
||||
if (nvme_identify_ns_descs(ctrl, nsid, &ids))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Check if the namespace is ready. If not ignore it, we will get an
|
||||
* AEN once it becomes ready and restart the scan.
|
||||
*/
|
||||
if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
|
||||
!nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
|
||||
ready = id->nstat & NVME_NSTAT_NRDY;
|
||||
kfree(id);
|
||||
}
|
||||
|
||||
if (!ready)
|
||||
return;
|
||||
|
||||
ns = nvme_find_get_ns(ctrl, nsid);
|
||||
if (ns) {
|
||||
nvme_validate_ns(ns, &ids);
|
||||
@ -4239,11 +4299,26 @@ static void nvme_scan_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_ctrl *ctrl =
|
||||
container_of(work, struct nvme_ctrl, scan_work);
|
||||
int ret;
|
||||
|
||||
/* No tagset on a live ctrl means IO queues could not created */
|
||||
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Identify controller limits can change at controller reset due to
|
||||
* new firmware download, even though it is not common we cannot ignore
|
||||
* such scenario. Controller's non-mdts limits are reported in the unit
|
||||
* of logical blocks that is dependent on the format of attached
|
||||
* namespace. Hence re-read the limits at the time of ns allocation.
|
||||
*/
|
||||
ret = nvme_init_non_mdts_limits(ctrl);
|
||||
if (ret < 0) {
|
||||
dev_warn(ctrl->device,
|
||||
"reading non-mdts-limits failed: %d\n", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
|
||||
dev_info(ctrl->device, "rescanning namespaces.\n");
|
||||
nvme_clear_changed_ns_log(ctrl);
|
||||
@ -4841,6 +4916,8 @@ static inline void _nvme_check_size(void)
|
||||
BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
|
||||
NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
|
||||
|
@ -187,6 +187,14 @@ static inline char *nvmf_ctrl_subsysnqn(struct nvme_ctrl *ctrl)
|
||||
return ctrl->subsys->subnqn;
|
||||
}
|
||||
|
||||
static inline void nvmf_complete_timed_out_request(struct request *rq)
|
||||
{
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
}
|
||||
|
||||
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
|
||||
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
|
||||
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
|
||||
|
@ -284,6 +284,7 @@ struct nvme_ctrl {
|
||||
#endif
|
||||
u16 crdt[3];
|
||||
u16 oncs;
|
||||
u32 dmrsl;
|
||||
u16 oacs;
|
||||
u16 sqsize;
|
||||
u32 max_namespaces;
|
||||
|
@ -1439,6 +1439,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
|
||||
nvme_init_request(abort_req, &cmd);
|
||||
|
||||
abort_req->end_io_data = NULL;
|
||||
abort_req->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(abort_req, false, abort_endio);
|
||||
|
||||
/*
|
||||
@ -1775,6 +1776,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
|
||||
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
|
||||
if (IS_ERR(dev->ctrl.admin_q)) {
|
||||
blk_mq_free_tag_set(&dev->admin_tagset);
|
||||
dev->ctrl.admin_q = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!blk_get_queue(dev->ctrl.admin_q)) {
|
||||
@ -2486,6 +2488,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
|
||||
req->end_io_data = nvmeq;
|
||||
|
||||
init_completion(&nvmeq->delete_done);
|
||||
req->rq_flags |= RQF_QUIET;
|
||||
blk_execute_rq_nowait(req, false, opcode == nvme_admin_delete_cq ?
|
||||
nvme_del_cq_end : nvme_del_queue_end);
|
||||
return 0;
|
||||
@ -2675,7 +2678,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
|
||||
struct pci_dev *pdev = to_pci_dev(dev->dev);
|
||||
|
||||
mutex_lock(&dev->shutdown_lock);
|
||||
if (pci_is_enabled(pdev)) {
|
||||
if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
|
||||
u32 csts = readl(dev->bar + NVME_REG_CSTS);
|
||||
|
||||
if (dev->ctrl.state == NVME_CTRL_LIVE ||
|
||||
|
@ -2010,10 +2010,7 @@ static void nvme_rdma_complete_timed_out(struct request *rq)
|
||||
struct nvme_rdma_queue *queue = req->queue;
|
||||
|
||||
nvme_rdma_stop_queue(queue);
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
nvmf_complete_timed_out_request(rq);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
|
@ -2318,10 +2318,7 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
|
||||
struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
|
||||
|
||||
nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
|
||||
if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) {
|
||||
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
|
||||
blk_mq_complete_request(rq);
|
||||
}
|
||||
nvmf_complete_timed_out_request(rq);
|
||||
}
|
||||
|
||||
static enum blk_eh_timer_return
|
||||
|
@ -782,7 +782,6 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
|
||||
blk_queue_segment_boundary(q, PAGE_SIZE - 1);
|
||||
|
||||
q->limits.discard_granularity = logical_block_size;
|
||||
q->limits.discard_alignment = PAGE_SIZE;
|
||||
|
||||
/* Calculate max_discard_sectors and make it PAGE aligned */
|
||||
max_bytes = USHRT_MAX * logical_block_size;
|
||||
|
@ -44,7 +44,7 @@ struct block_device {
|
||||
unsigned long bd_stamp;
|
||||
bool bd_read_only; /* read-only policy */
|
||||
dev_t bd_dev;
|
||||
int bd_openers;
|
||||
atomic_t bd_openers;
|
||||
struct inode * bd_inode; /* will die */
|
||||
struct super_block * bd_super;
|
||||
void * bd_claiming;
|
||||
|
@ -176,6 +176,21 @@ static inline bool disk_live(struct gendisk *disk)
|
||||
return !inode_unhashed(disk->part0->bd_inode);
|
||||
}
|
||||
|
||||
/**
|
||||
* disk_openers - returns how many openers are there for a disk
|
||||
* @disk: disk to check
|
||||
*
|
||||
* This returns the number of openers for a disk. Note that this value is only
|
||||
* stable if disk->open_mutex is held.
|
||||
*
|
||||
* Note: Due to a quirk in the block layer open code, each open partition is
|
||||
* only counted once even if there are multiple openers.
|
||||
*/
|
||||
static inline unsigned int disk_openers(struct gendisk *disk)
|
||||
{
|
||||
return atomic_read(&disk->part0->bd_openers);
|
||||
}
|
||||
|
||||
/*
|
||||
* The gendisk is refcounted by the part0 block_device, and the bd_device
|
||||
* therein is also used for device model presentation in sysfs.
|
||||
|
@ -137,6 +137,7 @@ enum {
|
||||
NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
|
||||
* Space Control
|
||||
*/
|
||||
NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */
|
||||
NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
|
||||
NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
|
||||
NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
|
||||
@ -161,6 +162,9 @@ enum {
|
||||
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
|
||||
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
|
||||
|
||||
#define NVME_CRTO_CRIMT(crto) ((crto) >> 16)
|
||||
#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff)
|
||||
|
||||
enum {
|
||||
NVME_CMBSZ_SQS = 1 << 0,
|
||||
NVME_CMBSZ_CQS = 1 << 1,
|
||||
@ -204,8 +208,10 @@ enum {
|
||||
NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
|
||||
NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
|
||||
NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
|
||||
NVME_CAP_CSS_NVM = 1 << 0,
|
||||
NVME_CAP_CSS_CSI = 1 << 6,
|
||||
NVME_CC_CRIME = 1 << 24,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_CSTS_RDY = 1 << 0,
|
||||
NVME_CSTS_CFS = 1 << 1,
|
||||
NVME_CSTS_NSSRO = 1 << 4,
|
||||
@ -214,10 +220,23 @@ enum {
|
||||
NVME_CSTS_SHST_OCCUR = 1 << 2,
|
||||
NVME_CSTS_SHST_CMPLT = 2 << 2,
|
||||
NVME_CSTS_SHST_MASK = 3 << 2,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_CMBMSC_CRE = 1 << 0,
|
||||
NVME_CMBMSC_CMSE = 1 << 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_CAP_CSS_NVM = 1 << 0,
|
||||
NVME_CAP_CSS_CSI = 1 << 6,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_CAP_CRMS_CRIMS = 1ULL << 59,
|
||||
NVME_CAP_CRMS_CRWMS = 1ULL << 60,
|
||||
};
|
||||
|
||||
struct nvme_id_power_state {
|
||||
__le16 max_power; /* centiwatts */
|
||||
__u8 rsvd2;
|
||||
@ -405,6 +424,21 @@ struct nvme_id_ns {
|
||||
__u8 vs[3712];
|
||||
};
|
||||
|
||||
/* I/O Command Set Independent Identify Namespace Data Structure */
|
||||
struct nvme_id_ns_cs_indep {
|
||||
__u8 nsfeat;
|
||||
__u8 nmic;
|
||||
__u8 rescap;
|
||||
__u8 fpi;
|
||||
__le32 anagrpid;
|
||||
__u8 nsattr;
|
||||
__u8 rsvd9;
|
||||
__le16 nvmsetid;
|
||||
__le16 endgid;
|
||||
__u8 nstat;
|
||||
__u8 rsvd15[4081];
|
||||
};
|
||||
|
||||
struct nvme_zns_lbafe {
|
||||
__le64 zsze;
|
||||
__u8 zdes;
|
||||
@ -469,6 +503,7 @@ enum {
|
||||
NVME_ID_CNS_NS_DESC_LIST = 0x03,
|
||||
NVME_ID_CNS_CS_NS = 0x05,
|
||||
NVME_ID_CNS_CS_CTRL = 0x06,
|
||||
NVME_ID_CNS_NS_CS_INDEP = 0x08,
|
||||
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
|
||||
NVME_ID_CNS_NS_PRESENT = 0x11,
|
||||
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
|
||||
@ -522,6 +557,10 @@ enum {
|
||||
NVME_NS_DPS_PI_TYPE3 = 3,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_NSTAT_NRDY = 1 << 0,
|
||||
};
|
||||
|
||||
enum {
|
||||
NVME_NVM_NS_16B_GUARD = 0,
|
||||
NVME_NVM_NS_32B_GUARD = 1,
|
||||
@ -1583,6 +1622,7 @@ enum {
|
||||
NVME_SC_NS_WRITE_PROTECTED = 0x20,
|
||||
NVME_SC_CMD_INTERRUPTED = 0x21,
|
||||
NVME_SC_TRANSIENT_TR_ERR = 0x22,
|
||||
NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
|
||||
NVME_SC_INVALID_IO_CMD_SET = 0x2C,
|
||||
|
||||
NVME_SC_LBA_RANGE = 0x80,
|
||||
@ -1679,9 +1719,11 @@ enum {
|
||||
/*
|
||||
* Path-related Errors:
|
||||
*/
|
||||
NVME_SC_INTERNAL_PATH_ERROR = 0x300,
|
||||
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
|
||||
NVME_SC_ANA_INACCESSIBLE = 0x302,
|
||||
NVME_SC_ANA_TRANSITION = 0x303,
|
||||
NVME_SC_CTRL_PATH_ERROR = 0x360,
|
||||
NVME_SC_HOST_PATH_ERROR = 0x370,
|
||||
NVME_SC_HOST_ABORTED_CMD = 0x371,
|
||||
|
||||
|
@ -1,11 +1,6 @@
|
||||
/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
|
||||
/*
|
||||
* include/linux/loop.h
|
||||
*
|
||||
* Written by Theodore Ts'o, 3/29/93.
|
||||
*
|
||||
* Copyright 1993 by Theodore Ts'o. Redistribution of this file is
|
||||
* permitted under the GNU General Public License.
|
||||
* Copyright 1993 by Theodore Ts'o.
|
||||
*/
|
||||
#ifndef _UAPI_LINUX_LOOP_H
|
||||
#define _UAPI_LINUX_LOOP_H
|
||||
|
Loading…
Reference in New Issue
Block a user