Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Add msg handlers for SRIOV RAS Telemetry

Add message handlers for RAS telemetry.

Signed-off-by: Victor Skvortsov <victor.skvortsov@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Victor Skvortsov and committed by
Alex Deucher
9928509d 60c58d72

+15 -2
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
··· 95 95 void (*ras_poison_handler)(struct amdgpu_device *adev, 96 96 enum amdgpu_ras_block block); 97 97 bool (*rcvd_ras_intr)(struct amdgpu_device *adev); 98 + int (*req_ras_err_count)(struct amdgpu_device *adev); 98 99 }; 99 100 100 101 /*
+14 -2
drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
··· 61 61 static int xgpu_nv_mailbox_rcv_msg(struct amdgpu_device *adev, 62 62 enum idh_event event) 63 63 { 64 + int r = 0; 64 65 u32 reg; 65 66 66 67 reg = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW0); 67 - if (reg != event) 68 + if (reg == IDH_FAIL) 69 + r = -EINVAL; 70 + else if (reg != event) 68 71 return -ENOENT; 69 72 70 73 xgpu_nv_mailbox_send_ack(adev); 71 74 72 - return 0; 75 + return r; 73 76 } 74 77 75 78 static uint8_t xgpu_nv_peek_ack(struct amdgpu_device *adev) ··· 180 177 case IDH_RAS_POISON: 181 178 if (data1 != 0) 182 179 event = IDH_RAS_POISON_READY; 180 + break; 181 + case IDH_REQ_RAS_ERROR_COUNT: 182 + event = IDH_RAS_ERROR_COUNT_READY; 183 183 break; 184 184 default: 185 185 break; ··· 462 456 return (msg == IDH_RAS_ERROR_DETECTED || msg == 0xFFFFFFFF); 463 457 } 464 458 459 + static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev) 460 + { 461 + return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT); 462 + } 463 + 465 464 const struct amdgpu_virt_ops xgpu_nv_virt_ops = { 466 465 .req_full_gpu = xgpu_nv_request_full_gpu_access, 467 466 .rel_full_gpu = xgpu_nv_release_full_gpu_access, ··· 477 466 .trans_msg = xgpu_nv_mailbox_trans_msg, 478 467 .ras_poison_handler = xgpu_nv_ras_poison_handler, 479 468 .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, 469 + .req_ras_err_count = xgpu_nv_req_ras_err_count, 480 470 };