Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: revert context to stop engine before mode2 reset

For some hang caused by slow tests, engine cannot be stopped which
may cause resume failure after reset. In this case, force halt
engine by reverting context addresses

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Victor Zhao and committed by
Alex Deucher
72fadb13 bfaced6e

+40
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 5039 5039 5040 5040 /* set guilty */ 5041 5041 drm_sched_increase_karma(s_job); 5042 + amdgpu_reset_prepare_hwcontext(adev, reset_context); 5042 5043 retry: 5043 5044 /* do hw reset */ 5044 5045 if (amdgpu_sriov_vf(adev)) {
+1
drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
··· 37 37 void (*utcl2_harvest)(struct amdgpu_device *adev); 38 38 void (*mode2_save_regs)(struct amdgpu_device *adev); 39 39 void (*mode2_restore_regs)(struct amdgpu_device *adev); 40 + void (*halt)(struct amdgpu_device *adev); 40 41 }; 41 42 42 43 struct amdgpu_gfxhub {
+36
drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
··· 646 646 WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, adev->gmc.MC_VM_MX_L1_TLB_CNTL); 647 647 } 648 648 649 + static void gfxhub_v2_1_halt(struct amdgpu_device *adev) 650 + { 651 + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; 652 + int i; 653 + uint32_t tmp; 654 + int time = 1000; 655 + 656 + gfxhub_v2_1_set_fault_enable_default(adev, false); 657 + 658 + for (i = 0; i <= 14; i++) { 659 + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, 660 + i * hub->ctx_addr_distance, ~0); 661 + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, 662 + i * hub->ctx_addr_distance, ~0); 663 + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, 664 + i * hub->ctx_addr_distance, 665 + 0); 666 + WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, 667 + i * hub->ctx_addr_distance, 668 + 0); 669 + } 670 + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 671 + while ((tmp & (GRBM_STATUS2__EA_BUSY_MASK | 672 + GRBM_STATUS2__EA_LINK_BUSY_MASK)) != 0 && 673 + time) { 674 + udelay(100); 675 + time--; 676 + tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 677 + } 678 + 679 + if (!time) { 680 + DRM_WARN("failed to wait for GRBM(EA) idle\n"); 681 + } 682 + } 683 + 649 684 const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { 650 685 .get_fb_location = gfxhub_v2_1_get_fb_location, 651 686 .get_mc_fb_offset = gfxhub_v2_1_get_mc_fb_offset, ··· 693 658 .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, 694 659 .mode2_save_regs = gfxhub_v2_1_save_regs, 695 660 .mode2_restore_regs = gfxhub_v2_1_restore_regs, 661 + .halt = gfxhub_v2_1_halt, 696 662 };
+2
drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c
··· 97 97 if (!amdgpu_sriov_vf(adev)) { 98 98 if (adev->gfxhub.funcs->mode2_save_regs) 99 99 adev->gfxhub.funcs->mode2_save_regs(adev); 100 + if (adev->gfxhub.funcs->halt) 101 + adev->gfxhub.funcs->halt(adev); 100 102 r = sienna_cichlid_mode2_suspend_ip(adev); 101 103 } 102 104