accel/habanalabs: change the heartbeat scheduling point

Currently we schedule the heartbeat thread at late init, only then
we set the INTS_REGISTER packet which enables events to be received
from firmware.

Init may take some time and we want to give firmware 2 full cycles of
heartbeat thread after it received INTS_REGISTER.

The patch will move the heartbeat thread scheduling to be after driver
is done with all initializations.

Signed-off-by: Farah Kassabri <fkassabri@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>
This commit is contained in:
Farah Kassabri 2024-04-04 11:06:03 +03:00 committed by Ofir Bitton
parent 3309887c6f
commit 52fbab90ed

View File

@ -1150,23 +1150,6 @@ static int device_late_init(struct hl_device *hdev)
}
hdev->high_pll = hdev->asic_prop.high_pll;
if (hdev->heartbeat) {
hdev->heartbeat_debug_info.heartbeat_event_counter = 0;
/*
* Before scheduling the heartbeat driver will check if eq event has received.
* for the first schedule we need to set the indication as true then for the next
* one this indication will be true only if eq event was sent by FW.
*/
hdev->eq_heartbeat_received = true;
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
}
hdev->late_init_done = true;
return 0;
@ -1183,9 +1166,6 @@ static void device_late_fini(struct hl_device *hdev)
if (!hdev->late_init_done)
return;
if (hdev->heartbeat)
cancel_delayed_work_sync(&hdev->work_heartbeat);
if (hdev->asic_funcs->late_fini)
hdev->asic_funcs->late_fini(hdev);
@ -1286,8 +1266,12 @@ static void hl_abort_waiting_for_completions(struct hl_device *hdev)
static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset,
bool skip_wq_flush)
{
if (hard_reset)
if (hard_reset) {
if (hdev->heartbeat)
cancel_delayed_work_sync(&hdev->work_heartbeat);
device_late_fini(hdev);
}
/*
* Halt the engines and disable interrupts so we won't get any more
@ -1565,6 +1549,26 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
}
}
static inline void device_heartbeat_schedule(struct hl_device *hdev)
{
if (!hdev->heartbeat)
return;
hdev->heartbeat_debug_info.heartbeat_event_counter = 0;
/*
* Before scheduling the heartbeat driver will check if eq event has received.
* for the first schedule we need to set the indication as true then for the next
* one this indication will be true only if eq event was sent by FW.
*/
hdev->eq_heartbeat_received = true;
INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
}
/*
* hl_device_reset - reset the device
*
@ -1934,6 +1938,8 @@ kill_processes:
if (hard_reset) {
hdev->reset_info.hard_reset_cnt++;
device_heartbeat_schedule(hdev);
/* After reset is done, we are ready to receive events from
* the F/W. We can't do it before because we will ignore events
* and if those events are fatal, we won't know about it and
@ -2368,6 +2374,12 @@ int hl_device_init(struct hl_device *hdev)
goto out_disabled;
}
/* Scheduling the EQ heartbeat thread must come after driver is done with all
* initializations, as we want to make sure the FW gets enough time to be prepared
* to respond to heartbeat packets.
*/
device_heartbeat_schedule(hdev);
dev_notice(hdev->dev,
"Successfully added device %s to habanalabs driver\n",
dev_name(&(hdev)->pdev->dev));