Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Trap handler support for expert scheduling mode

The trap may be entered with dependency checking disabled.
Wait for dependency counters and save/restore scheduling mode.

v2:

Use ttmp1 instead of ttmp11. ttmp11 is not zero-initialized.
While the trap handler does zero this field before use, a user-mode
second-level trap handler could not rely on this being zero when
using an older kernel mode driver.

v3:

Use ttmp11 primarily but copy to ttmp1 before jumping to the
second level trap handler. ttmp1 is inspectable by a debugger.
Unexpected bits in the unused space may regress existing software.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Lancelot Six <lancelot.six@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 423888879412e94725ca2bdccd89414887d98e31)
Cc: stable@vger.kernel.org

authored by

Jay Cornwall and committed by
Alex Deucher
b7851f8c bf2084a7

+73 -26
+36 -26
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
··· 3644 3644 }; 3645 3645 3646 3646 static const uint32_t cwsr_trap_gfx12_hex[] = { 3647 - 0xbfa00001, 0xbfa002a2, 3648 - 0xb0804009, 0xb8f8f804, 3647 + 0xbfa00001, 0xbfa002b2, 3648 + 0xb0804009, 0xb8eef81a, 3649 + 0xbf880000, 0xb980081a, 3650 + 0x00000000, 0xb8f8f804, 3651 + 0x9177ff77, 0x0c000000, 3652 + 0x846e9a6e, 0x8c776e77, 3649 3653 0x9178ff78, 0x00008c00, 3650 3654 0xb8fbf811, 0x8b6eff78, 3651 3655 0x00004000, 0xbfa10008, 3652 3656 0x8b6eff7b, 0x00000080, 3653 3657 0xbfa20018, 0x8b6ea07b, 3654 - 0xbfa20042, 0xbf830010, 3658 + 0xbfa2004a, 0xbf830010, 3655 3659 0xb8fbf811, 0xbfa0fffb, 3656 3660 0x8b6eff7b, 0x00000bd0, 3657 3661 0xbfa20010, 0xb8eef812, ··· 3666 3662 0xf0000000, 0xbfa20005, 3667 3663 0x8b6fff6f, 0x00000200, 3668 3664 0xbfa20002, 0x8b6ea07b, 3669 - 0xbfa2002c, 0xbefa4d82, 3665 + 0xbfa20034, 0xbefa4d82, 3670 3666 0xbf8a0000, 0x84fa887a, 3671 3667 0xbf0d8f7b, 0xbfa10002, 3672 3668 0x8c7bff7b, 0xffff0000, 3673 - 0xf4601bbd, 0xf8000010, 3674 - 0xbf8a0000, 0x846e976e, 3675 - 0x9177ff77, 0x00800000, 3676 - 0x8c776e77, 0xf4603bbd, 3677 - 0xf8000000, 0xbf8a0000, 3678 - 0xf4603ebd, 0xf8000008, 3679 - 0xbf8a0000, 0x8bee6e6e, 3680 - 0xbfa10001, 0xbe80486e, 3681 - 0x8b6eff6d, 0xf0000000, 3682 - 0xbfa20009, 0xb8eef811, 3683 - 0x8b6eff6e, 0x00000080, 3684 - 0xbfa20007, 0x8c78ff78, 3685 - 0x00004000, 0x80ec886c, 3686 - 0x82ed806d, 0xbfa00002, 3687 - 0x806c846c, 0x826d806d, 3688 - 0x8b6dff6d, 0x0000ffff, 3689 - 0x8bfe7e7e, 0x8bea6a6a, 3690 - 0x85788978, 0xb9783244, 3669 + 0x8b6eff77, 0x0c000000, 3670 + 0x916dff6d, 0x0c000000, 3671 + 0x8c6d6e6d, 0xf4601bbd, 3672 + 0xf8000010, 0xbf8a0000, 3673 + 0x846e976e, 0x9177ff77, 3674 + 0x00800000, 0x8c776e77, 3675 + 0xf4603bbd, 0xf8000000, 3676 + 0xbf8a0000, 0xf4603ebd, 3677 + 0xf8000008, 0xbf8a0000, 3678 + 0x8bee6e6e, 0xbfa10001, 3679 + 0xbe80486e, 0x8b6eff6d, 3680 + 0xf0000000, 0xbfa20009, 3681 + 0xb8eef811, 0x8b6eff6e, 3682 + 0x00000080, 0xbfa20007, 3683 + 0x8c78ff78, 0x00004000, 3684 + 0x80ec886c, 0x82ed806d, 3685 + 0xbfa00002, 0x806c846c, 3686 + 0x826d806d, 0x8b6dff6d, 3687 + 0x0000ffff, 0x8bfe7e7e, 3688 + 0x8bea6a6a, 0x85788978, 3689 + 0x936eff77, 0x0002001a, 3690 + 0xb96ef81a, 0xb9783244, 3691 3691 0xbe804a6c, 0xb8faf802, 3692 3692 0xbf0d987a, 0xbfa10001, 3693 3693 0xbfb00000, 0x8b6dff6d, ··· 3989 3981 0x008ce800, 0x00000000, 3990 3982 0x807d817d, 0x8070ff70, 3991 3983 0x00000080, 0xbf0a7b7d, 3992 - 0xbfa2fff7, 0xbfa0016e, 3984 + 0xbfa2fff7, 0xbfa00171, 3993 3985 0xbef4007e, 0x8b75ff7f, 3994 3986 0x0000ffff, 0x8c75ff75, 3995 3987 0x00040000, 0xbef60080, ··· 4171 4163 0xf8000074, 0xbf8a0000, 4172 4164 0x8b6dff6d, 0x0000ffff, 4173 4165 0x8bfe7e7e, 0x8bea6a6a, 4174 - 0xb97af804, 0xbe804ec2, 4175 - 0xbf94fffe, 0xbe804a6c, 4166 + 0x936eff77, 0x0002001a, 4167 + 0xb96ef81a, 0xb97af804, 4176 4168 0xbe804ec2, 0xbf94fffe, 4177 - 0xbfb10000, 0xbf9f0000, 4169 + 0xbe804a6c, 0xbe804ec2, 4170 + 0xbf94fffe, 0xbfb10000, 4178 4171 0xbf9f0000, 0xbf9f0000, 4179 4172 0xbf9f0000, 0xbf9f0000, 4173 + 0xbf9f0000, 0x00000000, 4180 4174 }; 4181 4175 4182 4176 static const uint32_t cwsr_trap_gfx9_5_0_hex[] = {
+37
drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
··· 78 78 var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT 79 79 var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT 80 80 var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT 81 + 82 + var SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT = 0 83 + var SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE = 2 84 + 81 85 var BARRIER_STATE_SIGNAL_OFFSET = 16 82 86 var BARRIER_STATE_VALID_OFFSET = 0 83 87 88 + var TTMP11_SCHED_MODE_SHIFT = 26 89 + var TTMP11_SCHED_MODE_SIZE = 2 90 + var TTMP11_SCHED_MODE_MASK = 0xC000000 84 91 var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23 85 92 var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000 86 93 ··· 167 160 s_branch L_RESTORE 168 161 169 162 L_SKIP_RESTORE: 163 + // Assume most relaxed scheduling mode is set. Save and revert to normal mode. 164 + s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_SCHED_MODE) 165 + s_wait_alu 0 166 + s_setreg_imm32_b32 hwreg(HW_REG_WAVE_SCHED_MODE, \ 167 + SQ_WAVE_SCHED_MODE_DEP_MODE_SHIFT, SQ_WAVE_SCHED_MODE_DEP_MODE_SIZE), 0 168 + 170 169 s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC 170 + 171 + // Save SCHED_MODE[1:0] into ttmp11[27:26]. 172 + s_andn2_b32 ttmp11, ttmp11, TTMP11_SCHED_MODE_MASK 173 + s_lshl_b32 ttmp2, ttmp2, TTMP11_SCHED_MODE_SHIFT 174 + s_or_b32 ttmp11, ttmp11, ttmp2 171 175 172 176 // Clear SPI_PRIO: do not save with elevated priority. 173 177 // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd. ··· 256 238 s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA 257 239 s_or_b32 ttmp15, ttmp15, 0xFFFF0000 258 240 L_NO_SIGN_EXTEND_TMA: 241 + #if ASIC_FAMILY == CHIP_GFX12 242 + // Move SCHED_MODE[1:0] from ttmp11 to unused bits in ttmp1[27:26] (return PC_HI). 243 + // The second-level trap will restore from ttmp1 for backwards compatibility. 244 + s_and_b32 ttmp2, ttmp11, TTMP11_SCHED_MODE_MASK 245 + s_andn2_b32 ttmp1, ttmp1, TTMP11_SCHED_MODE_MASK 246 + s_or_b32 ttmp1, ttmp1, ttmp2 247 + #endif 259 248 260 249 s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag 261 250 s_wait_idle ··· 312 287 // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it. 313 288 // Only restore fields which the trap handler changes. 314 289 s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT 290 + 291 + // Assume relaxed scheduling mode after this point. 292 + restore_sched_mode(ttmp2) 293 + 315 294 s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \ 316 295 SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv 317 296 ··· 1072 1043 s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 1073 1044 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 1074 1045 1046 + // Assume relaxed scheduling mode after this point. 1047 + restore_sched_mode(s_restore_tmp) 1048 + 1075 1049 s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu 1076 1050 1077 1051 // Make barrier and LDS state visible to all waves in the group. ··· 1165 1133 ds_nop 1166 1134 end 1167 1135 #endif 1136 + end 1137 + 1138 + function restore_sched_mode(s_tmp) 1139 + s_bfe_u32 s_tmp, ttmp11, (TTMP11_SCHED_MODE_SHIFT | (TTMP11_SCHED_MODE_SIZE << 0x10)) 1140 + s_setreg_b32 hwreg(HW_REG_WAVE_SCHED_MODE), s_tmp 1168 1141 end