Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Check int source id for utcl2 poison event

Traditional utcl2 fault_status polling does not
work in SRIOV environment. The polling of fault
status register from guest side will be dropped
by hardware.

Driver should switch to check utcl2 interrupt
source id to identify utcl2 poison event. It is
set to 1 when poisoned data interrupts are
signaled.

v2: drop the unused local variable (Tao)

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Hawking Zhang and committed by
Alex Deucher
db6341a9 88c511de

+2 -17
+1 -17
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
··· 431 431 client_id == SOC15_IH_CLIENTID_UTCL2) { 432 432 struct kfd_vm_fault_info info = {0}; 433 433 uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); 434 - uint32_t node_id = SOC15_NODEID_FROM_IH_ENTRY(ih_ring_entry); 435 - uint32_t vmid_type = SOC15_VMID_TYPE_FROM_IH_ENTRY(ih_ring_entry); 436 - int hub_inst = 0; 437 434 struct kfd_hsa_memory_exception_data exception_data; 438 435 439 - /* gfxhub */ 440 - if (!vmid_type && dev->adev->gfx.funcs->ih_node_to_logical_xcc) { 441 - hub_inst = dev->adev->gfx.funcs->ih_node_to_logical_xcc(dev->adev, 442 - node_id); 443 - if (hub_inst < 0) 444 - hub_inst = 0; 445 - } 446 - 447 - /* mmhub */ 448 - if (vmid_type && client_id == SOC15_IH_CLIENTID_VMC) 449 - hub_inst = node_id / 4; 450 - 451 - if (amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev, 452 - hub_inst, vmid_type)) { 436 + if (source_id == SOC15_INTSRC_VMC_UTCL2_POISON) { 453 437 event_interrupt_poison_consumption_v9(dev, pasid, client_id); 454 438 return; 455 439 }
+1
drivers/gpu/drm/amd/amdkfd/soc15_int.h
··· 29 29 #define SOC15_INTSRC_CP_BAD_OPCODE 183 30 30 #define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 31 31 #define SOC15_INTSRC_VMC_FAULT 0 32 + #define SOC15_INTSRC_VMC_UTCL2_POISON 1 32 33 #define SOC15_INTSRC_SDMA_TRAP 224 33 34 #define SOC15_INTSRC_SDMA_ECC 220 34 35 #define SOC21_INTSRC_SDMA_TRAP 49