nvme-fc: fix race between error recovery and creating association

There is a small race window between nvme-fc association creation and error
recovery. Fix this race condition by protecting accessing to controller
state and ASSOC_FAILED flag under nvme-fc controller lock.

Signed-off-by: Michael Liang <mliang@purestorage.com>
Reviewed-by: Caleb Sander <csander@purestorage.com>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
This commit is contained in:
Michael Liang 2023-07-07 15:21:57 -06:00 committed by Keith Busch
parent 60e445bdfc
commit ee6fdc5055

View File

@ -2548,17 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
* the controller. Abort any ios on the association and let the * the controller. Abort any ios on the association and let the
* create_association error path resolve things. * create_association error path resolve things.
*/ */
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { enum nvme_ctrl_state state;
__nvme_fc_abort_outstanding_ios(ctrl, true); unsigned long flags;
spin_lock_irqsave(&ctrl->lock, flags);
state = ctrl->ctrl.state;
if (state == NVME_CTRL_CONNECTING) {
set_bit(ASSOC_FAILED, &ctrl->flags); set_bit(ASSOC_FAILED, &ctrl->flags);
spin_unlock_irqrestore(&ctrl->lock, flags);
__nvme_fc_abort_outstanding_ios(ctrl, true);
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport error during (re)connect\n", "NVME-FC{%d}: transport error during (re)connect\n",
ctrl->cnum); ctrl->cnum);
return; return;
} }
spin_unlock_irqrestore(&ctrl->lock, flags);
/* Otherwise, only proceed if in LIVE state - e.g. on first error */ /* Otherwise, only proceed if in LIVE state - e.g. on first error */
if (ctrl->ctrl.state != NVME_CTRL_LIVE) if (state != NVME_CTRL_LIVE)
return; return;
dev_warn(ctrl->ctrl.device, dev_warn(ctrl->ctrl.device,
@ -3172,12 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else else
ret = nvme_fc_recreate_io_queues(ctrl); ret = nvme_fc_recreate_io_queues(ctrl);
} }
spin_lock_irqsave(&ctrl->lock, flags);
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO; ret = -EIO;
if (ret) if (ret) {
spin_unlock_irqrestore(&ctrl->lock, flags);
goto out_term_aen_ops; goto out_term_aen_ops;
}
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
spin_unlock_irqrestore(&ctrl->lock, flags);
ctrl->ctrl.nr_reconnects = 0; ctrl->ctrl.nr_reconnects = 0;