Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add gfx10 hw debug mode enable and disable calls

Similar to GFX9 debug devices, set the hardware debug mode by draining
the SPI appropriately prior the mode setting request.

Because GFX10 has waves allocated by the work group boundary and each
SE's SPI instances do not communicate, the SPI drain time is much longer.
This long drain time will be fixed for GFX11 onwards.

Also remove a bunch of deprecated misplaced references for GFX10.3.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jonathan Kim and committed by
Alex Deucher
d13f050f bb13d763

+127 -145
+96
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 21 21 */ 22 22 #include "amdgpu.h" 23 23 #include "amdgpu_amdkfd.h" 24 + #include "amdgpu_amdkfd_gfx_v10.h" 24 25 #include "gc/gc_10_1_0_offset.h" 25 26 #include "gc/gc_10_1_0_sh_mask.h" 26 27 #include "athub/athub_2_0_0_offset.h" ··· 710 709 adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); 711 710 } 712 711 712 + /* 713 + * GFX10 helper for wave launch stall requirements on debug trap setting. 714 + * 715 + * vmid: 716 + * Target VMID to stall/unstall. 717 + * 718 + * stall: 719 + * 0-unstall wave launch (enable), 1-stall wave launch (disable). 720 + * After wavefront launch has been stalled, allocated waves must drain from 721 + * SPI in order for debug trap settings to take effect on those waves. 722 + * This is roughly a ~3500 clock cycle wait on SPI where a read on 723 + * SPI_GDBG_WAVE_CNTL translates to ~32 clock cycles. 724 + * KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY indicates the number of reads required. 725 + * 726 + * NOTE: We can afford to clear the entire STALL_VMID field on unstall 727 + * because current GFX10 chips cannot support multi-process debugging due to 728 + * trap configuration and masking being limited to global scope. Always 729 + * assume single process conditions. 730 + * 731 + */ 732 + 733 + #define KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY 110 734 + static void kgd_gfx_v10_set_wave_launch_stall(struct amdgpu_device *adev, uint32_t vmid, bool stall) 735 + { 736 + uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 737 + int i; 738 + 739 + data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID, 740 + stall ? 1 << vmid : 0); 741 + 742 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 743 + 744 + if (!stall) 745 + return; 746 + 747 + for (i = 0; i < KGD_GFX_V10_WAVE_LAUNCH_SPI_DRAIN_LATENCY; i++) 748 + RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 749 + } 750 + 751 + uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, 752 + bool restore_dbg_registers, 753 + uint32_t vmid) 754 + { 755 + 756 + mutex_lock(&adev->grbm_idx_mutex); 757 + 758 + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); 759 + 760 + /* assume gfx off is disabled for the debug session if rlc restore not supported. */ 761 + if (restore_dbg_registers) { 762 + uint32_t data = 0; 763 + 764 + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 765 + VMID_SEL, 1 << vmid); 766 + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, 767 + TRAP_EN, 1); 768 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); 769 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); 770 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); 771 + 772 + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); 773 + 774 + mutex_unlock(&adev->grbm_idx_mutex); 775 + 776 + return 0; 777 + } 778 + 779 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 780 + 781 + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); 782 + 783 + mutex_unlock(&adev->grbm_idx_mutex); 784 + 785 + return 0; 786 + } 787 + 788 + uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, 789 + bool keep_trap_enabled, 790 + uint32_t vmid) 791 + { 792 + mutex_lock(&adev->grbm_idx_mutex); 793 + 794 + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, true); 795 + 796 + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 797 + 798 + kgd_gfx_v10_set_wave_launch_stall(adev, vmid, false); 799 + 800 + mutex_unlock(&adev->grbm_idx_mutex); 801 + 802 + return 0; 803 + } 804 + 713 805 static void program_trap_handler_settings(struct amdgpu_device *adev, 714 806 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 715 807 uint32_t inst) ··· 846 752 .get_atc_vmid_pasid_mapping_info = 847 753 get_atc_vmid_pasid_mapping_info, 848 754 .set_vm_context_page_table_base = set_vm_context_page_table_base, 755 + .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, 756 + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, 849 757 .program_trap_handler_settings = program_trap_handler_settings, 850 758 };
+28
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
··· 1 + /* 2 + * Copyright 2023 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + */ 22 + 23 + uint32_t kgd_gfx_v10_enable_debug_trap(struct amdgpu_device *adev, 24 + bool restore_dbg_registers, 25 + uint32_t vmid); 26 + uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, 27 + bool keep_trap_enabled, 28 + uint32_t vmid);
+3 -145
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
··· 22 22 #include <linux/mmu_context.h> 23 23 #include "amdgpu.h" 24 24 #include "amdgpu_amdkfd.h" 25 + #include "amdgpu_amdkfd_gfx_v10.h" 25 26 #include "gc/gc_10_3_0_offset.h" 26 27 #include "gc/gc_10_3_0_sh_mask.h" 27 28 #include "oss/osssys_5_0_0_offset.h" ··· 655 654 unlock_srbm(adev); 656 655 } 657 656 658 - #if 0 659 - uint32_t enable_debug_trap_v10_3(struct amdgpu_device *adev, 660 - uint32_t trap_debug_wave_launch_mode, 661 - uint32_t vmid) 662 - { 663 - uint32_t data = 0; 664 - uint32_t orig_wave_cntl_value; 665 - uint32_t orig_stall_vmid; 666 - 667 - mutex_lock(&adev->grbm_idx_mutex); 668 - 669 - orig_wave_cntl_value = RREG32(SOC15_REG_OFFSET(GC, 670 - 0, 671 - mmSPI_GDBG_WAVE_CNTL)); 672 - orig_stall_vmid = REG_GET_FIELD(orig_wave_cntl_value, 673 - SPI_GDBG_WAVE_CNTL, 674 - STALL_VMID); 675 - 676 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); 677 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 678 - 679 - data = 0; 680 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); 681 - 682 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), orig_stall_vmid); 683 - 684 - mutex_unlock(&adev->grbm_idx_mutex); 685 - 686 - return 0; 687 - } 688 - 689 - uint32_t disable_debug_trap_v10_3(struct amdgpu_device *adev) 690 - { 691 - mutex_lock(&adev->grbm_idx_mutex); 692 - 693 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); 694 - 695 - mutex_unlock(&adev->grbm_idx_mutex); 696 - 697 - return 0; 698 - } 699 - 700 - uint32_t set_wave_launch_trap_override_v10_3(struct amdgpu_device *adev, 701 - uint32_t trap_override, 702 - uint32_t trap_mask) 703 - { 704 - uint32_t data = 0; 705 - 706 - mutex_lock(&adev->grbm_idx_mutex); 707 - 708 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 709 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 1); 710 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 711 - 712 - data = 0; 713 - data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, 714 - EXCP_EN, trap_mask); 715 - data = REG_SET_FIELD(data, SPI_GDBG_TRAP_MASK, 716 - REPLACE, trap_override); 717 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), data); 718 - 719 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 720 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_RA, 0); 721 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 722 - 723 - mutex_unlock(&adev->grbm_idx_mutex); 724 - 725 - return 0; 726 - } 727 - 728 - uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev, 729 - uint8_t wave_launch_mode, 730 - uint32_t vmid) 731 - { 732 - uint32_t data = 0; 733 - bool is_stall_mode; 734 - bool is_mode_set; 735 - 736 - is_stall_mode = (wave_launch_mode == 4); 737 - is_mode_set = (wave_launch_mode != 0 && wave_launch_mode != 4); 738 - 739 - mutex_lock(&adev->grbm_idx_mutex); 740 - 741 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, 742 - VMID_MASK, is_mode_set ? 1 << vmid : 0); 743 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL2, 744 - MODE, is_mode_set ? wave_launch_mode : 0); 745 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL2), data); 746 - 747 - data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); 748 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, 749 - STALL_VMID, is_stall_mode ? 1 << vmid : 0); 750 - data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, 751 - STALL_RA, is_stall_mode ? 1 : 0); 752 - WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL), data); 753 - 754 - mutex_unlock(&adev->grbm_idx_mutex); 755 - 756 - return 0; 757 - } 758 - 759 - /* kgd_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values 760 - * The values read are: 761 - * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. 762 - * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. 763 - * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. 764 - * gws_wait_time -- Wait Count for Global Wave Syncs. 765 - * que_sleep_wait_time -- Wait Count for Dequeue Retry. 766 - * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. 767 - * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. 768 - * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 769 - */ 770 - void get_iq_wait_times_v10_3(struct amdgpu_device *adev, 771 - uint32_t *wait_times, uint32_t inst) 772 - 773 - { 774 - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 775 - } 776 - 777 - void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev, 778 - uint32_t wait_times, 779 - uint32_t grace_period, 780 - uint32_t *reg_offset, 781 - uint32_t *reg_data, 782 - uint32_t inst) 783 - { 784 - *reg_data = wait_times; 785 - 786 - *reg_data = REG_SET_FIELD(*reg_data, 787 - CP_IQ_WAIT_TIME2, 788 - SCH_WAVE, 789 - grace_period); 790 - 791 - *reg_offset = mmCP_IQ_WAIT_TIME2; 792 - } 793 - #endif 794 - 795 657 const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { 796 658 .program_sh_mem_settings = program_sh_mem_settings_v10_3, 797 659 .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v10_3, ··· 672 808 .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, 673 809 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, 674 810 .program_trap_handler_settings = program_trap_handler_settings_v10_3, 675 - #if 0 676 - .enable_debug_trap = enable_debug_trap_v10_3, 677 - .disable_debug_trap = disable_debug_trap_v10_3, 678 - .set_wave_launch_trap_override = set_wave_launch_trap_override_v10_3, 679 - .set_wave_launch_mode = set_wave_launch_mode_v10_3, 680 - .get_iq_wait_times = get_iq_wait_times_v10_3, 681 - .build_grace_period_packet_info = build_grace_period_packet_info_v10_3, 682 - #endif 811 + .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, 812 + .disable_debug_trap = kgd_gfx_v10_disable_debug_trap 683 813 };