nvmet: support reservation feature

This patch implements the reservation feature, including:
  1. reservation register(register, unregister and replace).
  2. reservation acquire(acquire, preempt, preempt and abort).
  3. reservation release(release and clear).
  4. reservation report.
  5. set feature and get feature of reservation notify mask.
  6. get log page of reservation event.

Not supported:
  1. persistent reservation through power loss.

Test cases:
  Use nvme-cli and fio to test all implemented sub features:
  1. use nvme resv-register to register host a registrant or
     unregister or replace a new key.
  2. use nvme resv-acquire to set host to the holder, and use fio
     to send read and write io in all reservation type. And also
     test preempt and "preempt and abort".
  3. use nvme resv-report to show all registrants and reservation
     status.
  4. use nvme resv-release to release all registrants.
  5. use nvme get-log to get events generated by the preceding
     operations.

In addition, make reservation configurable, one can set ns to
support reservation before enable ns. The default of resv_enable
is false.

Signed-off-by: Guixin Liu <kanie@linux.alibaba.com>
Reviewed-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
Guixin Liu 2024-11-06 15:34:46 +08:00 committed by Keith Busch
parent 1900e1a449
commit 5a47c2080a
8 changed files with 1329 additions and 13 deletions

View File

@ -10,7 +10,7 @@ obj-$(CONFIG_NVME_TARGET_FCLOOP) += nvme-fcloop.o
obj-$(CONFIG_NVME_TARGET_TCP) += nvmet-tcp.o
nvmet-y += core.o configfs.o admin-cmd.o fabrics-cmd.o \
discovery.o io-cmd-file.o io-cmd-bdev.o
discovery.o io-cmd-file.o io-cmd-bdev.o pr.o
nvmet-$(CONFIG_NVME_TARGET_DEBUGFS) += debugfs.o
nvmet-$(CONFIG_NVME_TARGET_PASSTHRU) += passthru.o
nvmet-$(CONFIG_BLK_DEV_ZONED) += zns.o

View File

@ -176,6 +176,10 @@ static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
log->iocs[nvme_cmd_read] =
log->iocs[nvme_cmd_flush] =
log->iocs[nvme_cmd_dsm] =
log->iocs[nvme_cmd_resv_acquire] =
log->iocs[nvme_cmd_resv_register] =
log->iocs[nvme_cmd_resv_release] =
log->iocs[nvme_cmd_resv_report] =
cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
log->iocs[nvme_cmd_write] =
log->iocs[nvme_cmd_write_zeroes] =
@ -340,6 +344,8 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
return nvmet_execute_get_log_cmd_effects_ns(req);
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
case NVME_LOG_RESERVATION:
return nvmet_execute_get_log_page_resv(req);
}
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
@ -433,7 +439,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES);
NVME_CTRL_ONCS_WRITE_ZEROES |
NVME_CTRL_ONCS_RESERVATIONS);
/* XXX: don't report vwc if the underlying device is write through */
id->vwc = NVME_CTRL_VWC_PRESENT;
@ -551,6 +558,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->nmic = NVME_NS_NMIC_SHARED;
id->anagrpid = cpu_to_le32(req->ns->anagrpid);
if (req->ns->pr.enable)
id->rescap = NVME_PR_SUPPORT_WRITE_EXCLUSIVE |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS |
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY |
NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS |
NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS |
NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF;
memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid));
id->lbaf[0].ds = req->ns->blksize_shift;
@ -861,6 +877,9 @@ void nvmet_execute_set_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_set_feat_write_protect(req);
break;
case NVME_FEAT_RESV_MASK:
status = nvmet_set_feat_resv_notif_mask(req, cdw11);
break;
default:
req->error_loc = offsetof(struct nvme_common_command, cdw10);
status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
@ -959,6 +978,9 @@ void nvmet_execute_get_features(struct nvmet_req *req)
case NVME_FEAT_WRITE_PROTECT:
status = nvmet_get_feat_write_protect(req);
break;
case NVME_FEAT_RESV_MASK:
status = nvmet_get_feat_resv_notif_mask(req);
break;
default:
req->error_loc =
offsetof(struct nvme_common_command, cdw10);

View File

@ -769,6 +769,32 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page)
{
return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable);
}
static ssize_t nvmet_ns_resv_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
bool val;
if (kstrtobool(page, &val))
return -EINVAL;
mutex_lock(&ns->subsys->lock);
if (ns->enabled) {
pr_err("the ns:%d is already enabled.\n", ns->nsid);
mutex_unlock(&ns->subsys->lock);
return -EINVAL;
}
ns->pr.enable = val;
mutex_unlock(&ns->subsys->lock);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, resv_enable);
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@ -777,6 +803,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
&nvmet_ns_attr_revalidate_size,
&nvmet_ns_attr_resv_enable,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif

View File

@ -611,6 +611,12 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ret)
goto out_restore_subsys_maxnsid;
if (ns->pr.enable) {
ret = nvmet_pr_init_ns(ns);
if (ret)
goto out_remove_from_subsys;
}
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid);
@ -620,6 +626,8 @@ out_unlock:
mutex_unlock(&subsys->lock);
return ret;
out_remove_from_subsys:
xa_erase(&subsys->namespaces, ns->nsid);
out_restore_subsys_maxnsid:
subsys->max_nsid = nvmet_max_nsid(subsys);
percpu_ref_exit(&ns->ref);
@ -663,6 +671,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
if (ns->pr.enable)
nvmet_pr_exit_ns(ns);
mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
@ -754,6 +765,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
{
struct nvmet_ns *ns = req->ns;
struct nvmet_pr_per_ctrl_ref *pc_ref = req->pc_ref;
if (!req->sq->sqhd_disabled)
nvmet_update_sq_head(req);
@ -766,6 +778,9 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
trace_nvmet_req_complete(req);
req->ops->queue_response(req);
if (pc_ref)
nvmet_pr_put_ns_pc_ref(pc_ref);
if (ns)
nvmet_put_namespace(ns);
}
@ -929,18 +944,39 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
return ret;
}
if (req->ns->pr.enable) {
ret = nvmet_parse_pr_cmd(req);
if (!ret)
return ret;
}
switch (req->ns->csi) {
case NVME_CSI_NVM:
if (req->ns->file)
return nvmet_file_parse_io_cmd(req);
return nvmet_bdev_parse_io_cmd(req);
ret = nvmet_file_parse_io_cmd(req);
else
ret = nvmet_bdev_parse_io_cmd(req);
break;
case NVME_CSI_ZNS:
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return nvmet_bdev_zns_parse_io_cmd(req);
return NVME_SC_INVALID_IO_CMD_SET;
ret = nvmet_bdev_zns_parse_io_cmd(req);
else
ret = NVME_SC_INVALID_IO_CMD_SET;
break;
default:
return NVME_SC_INVALID_IO_CMD_SET;
ret = NVME_SC_INVALID_IO_CMD_SET;
}
if (ret)
return ret;
if (req->ns->pr.enable) {
ret = nvmet_pr_check_cmd_access(req);
if (ret)
return ret;
ret = nvmet_pr_get_ns_pc_ref(req);
}
return ret;
}
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
@ -964,6 +1000,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->ns = NULL;
req->error_loc = NVMET_NO_ERROR_LOC;
req->error_slba = 0;
req->pc_ref = NULL;
/* no support for fused commands yet */
if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
@ -1015,6 +1052,8 @@ EXPORT_SYMBOL_GPL(nvmet_req_init);
void nvmet_req_uninit(struct nvmet_req *req)
{
percpu_ref_put(&req->sq->ref);
if (req->pc_ref)
nvmet_pr_put_ns_pc_ref(req->pc_ref);
if (req->ns)
nvmet_put_namespace(req->ns);
}
@ -1383,7 +1422,8 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
}
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
uuid_t *hostid)
{
struct nvmet_subsys *subsys;
struct nvmet_ctrl *ctrl;
@ -1462,6 +1502,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
}
ctrl->cntlid = ret;
uuid_copy(&ctrl->hostid, hostid);
/*
* Discovery controllers may use some arbitrary high value
* in order to cleanup stale discovery sessions
@ -1478,6 +1520,9 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_start_keep_alive_timer(ctrl);
mutex_lock(&subsys->lock);
ret = nvmet_ctrl_init_pr(ctrl);
if (ret)
goto init_pr_fail;
list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
nvmet_setup_p2p_ns_map(ctrl, req);
nvmet_debugfs_ctrl_setup(ctrl);
@ -1486,6 +1531,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
*ctrlp = ctrl;
return 0;
init_pr_fail:
mutex_unlock(&subsys->lock);
nvmet_stop_keep_alive_timer(ctrl);
ida_free(&cntlid_ida, ctrl->cntlid);
out_free_sqs:
kfree(ctrl->sqs);
out_free_changed_ns_list:
@ -1504,6 +1553,7 @@ static void nvmet_ctrl_free(struct kref *ref)
struct nvmet_subsys *subsys = ctrl->subsys;
mutex_lock(&subsys->lock);
nvmet_ctrl_destroy_pr(ctrl);
nvmet_release_p2p_ns_map(ctrl);
list_del(&ctrl->subsys_entry);
mutex_unlock(&subsys->lock);

View File

@ -245,12 +245,10 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
d->subsysnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
d->hostnqn[NVMF_NQN_FIELD_LEN - 1] = '\0';
status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req,
le32_to_cpu(c->kato), &ctrl);
le32_to_cpu(c->kato), &ctrl, &d->hostid);
if (status)
goto out;
uuid_copy(&ctrl->hostid, &d->hostid);
dhchap_status = nvmet_setup_auth(ctrl);
if (dhchap_status) {
pr_err("Failed to setup authentication, dhchap status %u\n",

View File

@ -20,6 +20,7 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/t10-pi.h>
#include <linux/kfifo.h>
#define NVMET_DEFAULT_VS NVME_VS(1, 3, 0)
@ -30,6 +31,7 @@
#define NVMET_MN_MAX_SIZE 40
#define NVMET_SN_MAX_SIZE 20
#define NVMET_FR_MAX_SIZE 8
#define NVMET_PR_LOG_QUEUE_SIZE 64
/*
* Supported optional AENs:
@ -56,6 +58,38 @@
#define IPO_IATTR_CONNECT_SQE(x) \
(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
struct nvmet_pr_registrant {
u64 rkey;
uuid_t hostid;
enum nvme_pr_type rtype;
struct list_head entry;
struct rcu_head rcu;
};
struct nvmet_pr {
bool enable;
unsigned long notify_mask;
atomic_t generation;
struct nvmet_pr_registrant __rcu *holder;
/*
* During the execution of the reservation command, mutual
* exclusion is required throughout the process. However,
* while waiting asynchronously for the 'per controller
* percpu_ref' to complete before the 'preempt and abort'
* command finishes, a semaphore is needed to ensure mutual
* exclusion instead of a mutex.
*/
struct semaphore pr_sem;
struct list_head registrant_list;
};
struct nvmet_pr_per_ctrl_ref {
struct percpu_ref ref;
struct completion free_done;
struct completion confirm_done;
uuid_t hostid;
};
struct nvmet_ns {
struct percpu_ref ref;
struct file *bdev_file;
@ -85,6 +119,8 @@ struct nvmet_ns {
int pi_type;
int metadata_size;
u8 csi;
struct nvmet_pr pr;
struct xarray pr_per_ctrl_refs;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@ -191,6 +227,13 @@ static inline bool nvmet_port_secure_channel_required(struct nvmet_port *port)
return nvmet_port_disc_addr_treq_secure_channel(port) == NVMF_TREQ_REQUIRED;
}
struct nvmet_pr_log_mgr {
struct mutex lock;
u64 lost_count;
u64 counter;
DECLARE_KFIFO(log_queue, struct nvme_pr_log, NVMET_PR_LOG_QUEUE_SIZE);
};
struct nvmet_ctrl {
struct nvmet_subsys *subsys;
struct nvmet_sq **sqs;
@ -246,6 +289,7 @@ struct nvmet_ctrl {
u8 *dh_key;
size_t dh_keysize;
#endif
struct nvmet_pr_log_mgr pr_log_mgr;
};
struct nvmet_subsys {
@ -396,6 +440,9 @@ struct nvmet_req {
struct work_struct zmgmt_work;
} z;
#endif /* CONFIG_BLK_DEV_ZONED */
struct {
struct work_struct abort_work;
} r;
};
int sg_cnt;
int metadata_sg_cnt;
@ -412,6 +459,7 @@ struct nvmet_req {
struct device *p2p_client;
u16 error_loc;
u64 error_slba;
struct nvmet_pr_per_ctrl_ref *pc_ref;
};
#define NVMET_MAX_MPOOL_BVEC 16
@ -498,7 +546,8 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp,
uuid_t *hostid);
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
const char *hostnqn, u16 cntlid,
struct nvmet_req *req);
@ -761,4 +810,18 @@ static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl)
static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; }
#endif
int nvmet_pr_init_ns(struct nvmet_ns *ns);
u16 nvmet_parse_pr_cmd(struct nvmet_req *req);
u16 nvmet_pr_check_cmd_access(struct nvmet_req *req);
int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl);
void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl);
void nvmet_pr_exit_ns(struct nvmet_ns *ns);
void nvmet_execute_get_log_page_resv(struct nvmet_req *req);
u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask);
u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req);
u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req);
static inline void nvmet_pr_put_ns_pc_ref(struct nvmet_pr_per_ctrl_ref *pc_ref)
{
percpu_ref_put(&pc_ref->ref);
}
#endif /* _NVMET_H */

1156
drivers/nvme/target/pr.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -2045,7 +2045,7 @@ enum {
NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
NVME_PR_LOG_RESERVATOPM_PREEMPTED = 0x03,
NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
};
enum {