accel/habanalabs: revise print on EQ heartbeat failure
Don't print the "previous EQ index" value in case of a EQ heartbeat failure, because it is incremented along with the EQ CI and therefore redundant. In addition, as the CPU-CP PI is zeroed when it reaches a value that is twice the queue size, add a value of the CI with a similar wrap around, to make it easier to compare the values. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Ofir Bitton <obitton@habana.ai> Signed-off-by: Ofir Bitton <obitton@habana.ai>
This commit is contained in:
parent
9ee446f9b5
commit
795f93e650
@ -1064,23 +1064,24 @@ static bool is_pci_link_healthy(struct hl_device *hdev)
|
||||
|
||||
static bool hl_device_eq_heartbeat_received(struct hl_device *hdev)
|
||||
{
|
||||
struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info;
|
||||
u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1;
|
||||
struct asic_fixed_properties *prop = &hdev->asic_prop;
|
||||
u32 cpu_q_id;
|
||||
|
||||
if (!prop->cpucp_info.eq_health_check_supported)
|
||||
return true;
|
||||
|
||||
if (!hdev->eq_heartbeat_received) {
|
||||
cpu_q_id = hdev->heartbeat_debug_info.cpu_queue_id;
|
||||
|
||||
dev_err(hdev->dev, "EQ heartbeat event was not received!\n");
|
||||
|
||||
dev_err(hdev->dev, "Heartbeat events counter: %u, Q_PI: %u, Q_CI: %u, EQ CI: %u, EQ prev: %u\n",
|
||||
hdev->heartbeat_debug_info.heartbeat_event_counter,
|
||||
hdev->kernel_queues[cpu_q_id].pi,
|
||||
atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
|
||||
hdev->event_queue.ci,
|
||||
hdev->event_queue.prev_eqe_index);
|
||||
dev_err(hdev->dev,
|
||||
"Heartbeat events counter: %u, EQ CI: %u, PQ PI: %u, PQ CI: %u (%u)\n",
|
||||
heartbeat_debug_info->heartbeat_event_counter,
|
||||
hdev->event_queue.ci,
|
||||
hdev->kernel_queues[cpu_q_id].pi,
|
||||
atomic_read(&hdev->kernel_queues[cpu_q_id].ci),
|
||||
atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user