scsi: lpfc: Trigger SLI4 firmware dump before doing driver cleanup

Extraneous teardown routines are present in the firmware dump path causing
altered states in firmware captures.

When a firmware dump is requested via sysfs, trigger the dump immediately
without tearing down structures and changing adapter state.

The driver shall rely on pre-existing firmware error state clean up
handlers to restore the adapter.

Link: https://lore.kernel.org/r/20211204002644.116455-6-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
James Smart 2021-12-03 16:26:40 -08:00 committed by Martin K. Petersen
parent 8ed190a919
commit 7dd2e2a923
4 changed files with 51 additions and 33 deletions

View File

@ -1021,7 +1021,6 @@ struct lpfc_hba {
#define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */
#define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */
#define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */
#define HBA_FW_DUMP_OP 0x10000 /* Skips fn reset before FW dump */
#define HBA_RECOVERABLE_UE 0x20000 /* Firmware supports recoverable UE */
#define HBA_FORCED_LINK_SPEED 0x40000 /*
* Firmware supports Forced Link Speed
@ -1038,6 +1037,7 @@ struct lpfc_hba {
#define HBA_HBEAT_TMO 0x8000000 /* HBEAT initiated after timeout */
#define HBA_FLOGI_OUTSTANDING 0x10000000 /* FLOGI is outstanding */
struct completion *fw_dump_cmpl; /* cmpl event tracker for fw_dump */
uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/
struct lpfc_dmabuf slim2p;

View File

@ -1709,25 +1709,25 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
before_fc_flag = phba->pport->fc_flag;
sriov_nr_virtfn = phba->cfg_sriov_nr_virtfn;
/* Disable SR-IOV virtual functions if enabled */
if (phba->cfg_sriov_nr_virtfn) {
pci_disable_sriov(pdev);
phba->cfg_sriov_nr_virtfn = 0;
if (opcode == LPFC_FW_DUMP) {
init_completion(&online_compl);
phba->fw_dump_cmpl = &online_compl;
} else {
/* Disable SR-IOV virtual functions if enabled */
if (phba->cfg_sriov_nr_virtfn) {
pci_disable_sriov(pdev);
phba->cfg_sriov_nr_virtfn = 0;
}
status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
if (status != 0)
return status;
/* wait for the device to be quiesced before firmware reset */
msleep(100);
}
if (opcode == LPFC_FW_DUMP)
phba->hba_flag |= HBA_FW_DUMP_OP;
status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE);
if (status != 0) {
phba->hba_flag &= ~HBA_FW_DUMP_OP;
return status;
}
/* wait for the device to be quiesced before firmware reset */
msleep(100);
reg_val = readl(phba->sli4_hba.conf_regs_memmap_p +
LPFC_CTL_PDEV_CTL_OFFSET);
@ -1756,24 +1756,42 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode)
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"3153 Fail to perform the requested "
"access: x%x\n", reg_val);
if (phba->fw_dump_cmpl)
phba->fw_dump_cmpl = NULL;
return rc;
}
/* keep the original port state */
if (before_fc_flag & FC_OFFLINE_MODE)
if (before_fc_flag & FC_OFFLINE_MODE) {
if (phba->fw_dump_cmpl)
phba->fw_dump_cmpl = NULL;
goto out;
}
init_completion(&online_compl);
job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
LPFC_EVT_ONLINE);
if (!job_posted)
goto out;
wait_for_completion(&online_compl);
/* Firmware dump will trigger an HA_ERATT event, and
* lpfc_handle_eratt_s4 routine already handles bringing the port back
* online.
*/
if (opcode == LPFC_FW_DUMP) {
wait_for_completion(phba->fw_dump_cmpl);
} else {
init_completion(&online_compl);
job_posted = lpfc_workq_post_event(phba, &status, &online_compl,
LPFC_EVT_ONLINE);
if (!job_posted)
goto out;
wait_for_completion(&online_compl);
}
out:
/* in any case, restore the virtual functions enabled as before */
if (sriov_nr_virtfn) {
/* If fw_dump was performed, first disable to clean up */
if (opcode == LPFC_FW_DUMP) {
pci_disable_sriov(pdev);
phba->cfg_sriov_nr_virtfn = 0;
}
sriov_err =
lpfc_sli_probe_sriov_nr_virtfn(phba, sriov_nr_virtfn);
if (!sriov_err)

View File

@ -869,10 +869,16 @@ lpfc_work_done(struct lpfc_hba *phba)
if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
lpfc_sli4_post_async_mbox(phba);
if (ha_copy & HA_ERATT)
if (ha_copy & HA_ERATT) {
/* Handle the error attention event */
lpfc_handle_eratt(phba);
if (phba->fw_dump_cmpl) {
complete(phba->fw_dump_cmpl);
phba->fw_dump_cmpl = NULL;
}
}
if (ha_copy & HA_MBATT)
lpfc_sli_handle_mb_event(phba);

View File

@ -5046,12 +5046,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba)
phba->fcf.fcf_flag = 0;
spin_unlock_irq(&phba->hbalock);
/* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */
if (phba->hba_flag & HBA_FW_DUMP_OP) {
phba->hba_flag &= ~HBA_FW_DUMP_OP;
return rc;
}
/* Now physically reset the device */
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0389 Performing PCI function reset!\n");