Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Add kernel parameter to stop queue eviction on vm fault

This is to keep wavefront context for debug purpose

Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Oak Zeng and committed by
Alex Deucher
6d909c5d 2f669734

+18 -4
+7
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
··· 751 751 module_param(no_system_mem_limit, bool, 0644); 752 752 MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); 753 753 754 + /** 755 + * DOC: no_queue_eviction_on_vm_fault (int) 756 + * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). 757 + */ 758 + int amdgpu_no_queue_eviction_on_vm_fault = 0; 759 + MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); 760 + module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); 754 761 #endif 755 762 756 763 /**
+3 -2
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
··· 80 80 ihre->source_id == CIK_INTSRC_SDMA_TRAP || 81 81 ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || 82 82 ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE || 83 - ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || 84 - ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT; 83 + ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || 84 + ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && 85 + !amdgpu_no_queue_eviction_on_vm_fault); 85 86 } 86 87 87 88 static void cik_event_interrupt_wq(struct kfd_dev *dev,
+3 -2
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
··· 98 98 source_id == SOC15_INTSRC_SDMA_TRAP || 99 99 source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || 100 100 source_id == SOC15_INTSRC_CP_BAD_OPCODE || 101 - client_id == SOC15_IH_CLIENTID_VMC || 101 + ((client_id == SOC15_IH_CLIENTID_VMC || 102 102 client_id == SOC15_IH_CLIENTID_VMC1 || 103 - client_id == SOC15_IH_CLIENTID_UTCL2; 103 + client_id == SOC15_IH_CLIENTID_UTCL2) && 104 + !amdgpu_no_queue_eviction_on_vm_fault); 104 105 } 105 106 106 107 static void event_interrupt_wq_v9(struct kfd_dev *dev,
+5
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 169 169 /* Queue preemption timeout in ms */ 170 170 extern int queue_preemption_timeout_ms; 171 171 172 + /* 173 + * Don't evict process queues on vm fault 174 + */ 175 + extern int amdgpu_no_queue_eviction_on_vm_fault; 176 + 172 177 /* Enable eviction debug messages */ 173 178 extern bool debug_evictions; 174 179