Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

accel/habanalabs: revise print on EQ heartbeat failure

Don't print the "previous EQ index" value in case of a EQ heartbeat
failure, because it is incremented along with the EQ CI and therefore
redundant.

In addition, as the CPU-CP PI is zeroed when it reaches a value that is
twice the queue size, add a value of the CI with a similar wrap around,
to make it easier to compare the values.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Ofir Bitton <obitton@habana.ai>
Signed-off-by: Ofir Bitton <obitton@habana.ai>

authored by

Tomer Tayar and committed by
Ofir Bitton
795f93e6 9ee446f9

+10 -9
+10 -9
drivers/accel/habanalabs/common/device.c
··· 1064 1064 1065 1065 static bool hl_device_eq_heartbeat_received(struct hl_device *hdev) 1066 1066 { 1067 + struct eq_heartbeat_debug_info *heartbeat_debug_info = &hdev->heartbeat_debug_info; 1068 + u32 cpu_q_id = heartbeat_debug_info->cpu_queue_id, pq_pi_mask = (HL_QUEUE_LENGTH << 1) - 1; 1067 1069 struct asic_fixed_properties *prop = &hdev->asic_prop; 1068 - u32 cpu_q_id; 1069 1070 1070 1071 if (!prop->cpucp_info.eq_health_check_supported) 1071 1072 return true; 1072 1073 1073 1074 if (!hdev->eq_heartbeat_received) { 1074 - cpu_q_id = hdev->heartbeat_debug_info.cpu_queue_id; 1075 - 1076 1075 dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); 1077 1076 1078 - dev_err(hdev->dev, "Heartbeat events counter: %u, Q_PI: %u, Q_CI: %u, EQ CI: %u, EQ prev: %u\n", 1079 - hdev->heartbeat_debug_info.heartbeat_event_counter, 1080 - hdev->kernel_queues[cpu_q_id].pi, 1081 - atomic_read(&hdev->kernel_queues[cpu_q_id].ci), 1082 - hdev->event_queue.ci, 1083 - hdev->event_queue.prev_eqe_index); 1077 + dev_err(hdev->dev, 1078 + "Heartbeat events counter: %u, EQ CI: %u, PQ PI: %u, PQ CI: %u (%u)\n", 1079 + heartbeat_debug_info->heartbeat_event_counter, 1080 + hdev->event_queue.ci, 1081 + hdev->kernel_queues[cpu_q_id].pi, 1082 + atomic_read(&hdev->kernel_queues[cpu_q_id].ci), 1083 + atomic_read(&hdev->kernel_queues[cpu_q_id].ci) & pq_pi_mask); 1084 + 1084 1085 return false; 1085 1086 } 1086 1087