accel/habanalabs: add event queue extra validation

In order to increase reliability of the event queue interface,
we apply to Gaudi2 the same mechanism we have in Gaudi1.
The extra validation is basically checking that the received
event index matches the expected index.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
This commit is contained in:
Ofir Bitton 2023-05-21 10:24:13 +03:00 committed by Oded Gabbay
parent 19aa21b980
commit fac91dd54f
2 changed files with 7 additions and 1 deletions

View File

@ -430,7 +430,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
cur_eqe_index = FIELD_GET(EQ_CTL_INDEX_MASK, cur_eqe);
if ((hdev->event_queue.check_eqe_index) &&
(((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK) != cur_eqe_index)) {
dev_dbg(hdev->dev,
dev_err(hdev->dev,
"EQE %#x in queue is ready but index does not match %d!=%d",
cur_eqe,
((eq->prev_eqe_index + 1) & EQ_CTL_INDEX_MASK),

View File

@ -3619,6 +3619,12 @@ static int gaudi2_sw_init(struct hl_device *hdev)
prop->supports_compute_reset = true;
/* Event queue sanity check added in FW version 1.11 */
if (hl_is_fw_sw_ver_below(hdev, 1, 11))
hdev->event_queue.check_eqe_index = false;
else
hdev->event_queue.check_eqe_index = true;
hdev->asic_funcs->set_pci_memory_regions(hdev);
rc = gaudi2_special_blocks_iterator_config(hdev);