Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: add kfd2kgd debugger callbacks for GC v9.4.3

Implement the similarities as GC v9.4.2, and the difference
for GC v9.4.3 HW spec, i.e. xcc instance.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Eric Huang and committed by
Alex Deucher
036e348f 9df88c81

+241 -25
+5 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
··· 23 23 #include "amdgpu_amdkfd.h" 24 24 #include "amdgpu_amdkfd_arcturus.h" 25 25 #include "amdgpu_amdkfd_gfx_v9.h" 26 + #include "amdgpu_amdkfd_aldebaran.h" 26 27 #include "gc/gc_9_4_2_offset.h" 27 28 #include "gc/gc_9_4_2_sh_mask.h" 28 29 #include <uapi/linux/kfd_ioctl.h> ··· 37 36 * initialize the debug mode registers after it has disabled GFX off during the 38 37 * debug session. 39 38 */ 40 - static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, 39 + uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, 41 40 bool restore_dbg_registers, 42 41 uint32_t vmid) 43 42 { ··· 108 107 return data; 109 108 } 110 109 111 - static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, 110 + uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, 112 111 uint8_t wave_launch_mode, 113 112 uint32_t vmid) 114 113 { ··· 126 125 uint32_t watch_address_mask, 127 126 uint32_t watch_id, 128 127 uint32_t watch_mode, 129 - uint32_t debug_vmid) 128 + uint32_t debug_vmid, 129 + uint32_t inst) 130 130 { 131 131 uint32_t watch_address_high; 132 132 uint32_t watch_address_low;
+27
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h
··· 1 + /* 2 + * Copyright 2023 Advanced Micro Devices, Inc. 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice shall be included in 12 + * all copies or substantial portions of the Software. 13 + * 14 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 + * OTHER DEALINGS IN THE SOFTWARE. 21 + */ 22 + uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, 23 + bool restore_dbg_registers, 24 + uint32_t vmid); 25 + uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, 26 + uint8_t wave_launch_mode, 27 + uint32_t vmid);
+165 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
··· 22 22 #include "amdgpu.h" 23 23 #include "amdgpu_amdkfd.h" 24 24 #include "amdgpu_amdkfd_gfx_v9.h" 25 + #include "amdgpu_amdkfd_aldebaran.h" 25 26 #include "gc/gc_9_4_3_offset.h" 26 27 #include "gc/gc_9_4_3_sh_mask.h" 27 28 #include "athub/athub_1_8_0_offset.h" ··· 33 32 #include "soc15.h" 34 33 #include "sdma/sdma_4_4_2_offset.h" 35 34 #include "sdma/sdma_4_4_2_sh_mask.h" 35 + #include <uapi/linux/kfd_ioctl.h> 36 36 37 37 static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) 38 38 { ··· 363 361 return 0; 364 362 } 365 363 364 + /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ 365 + static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev, 366 + bool keep_trap_enabled, 367 + uint32_t vmid) 368 + { 369 + uint32_t data = 0; 370 + 371 + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 372 + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); 373 + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); 374 + 375 + return data; 376 + } 377 + 378 + static int kgd_gfx_v9_4_3_validate_trap_override_request( 379 + struct amdgpu_device *adev, 380 + uint32_t trap_override, 381 + uint32_t *trap_mask_supported) 382 + { 383 + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | 384 + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | 385 + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | 386 + KFD_DBG_TRAP_MASK_FP_OVERFLOW | 387 + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | 388 + KFD_DBG_TRAP_MASK_FP_INEXACT | 389 + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | 390 + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | 391 + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION | 392 + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | 393 + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; 394 + 395 + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && 396 + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) 397 + return -EPERM; 398 + 399 + return 0; 400 + } 401 + 402 + static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) 403 + { 404 + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; 405 + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; 406 + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | 407 + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | 408 + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | 409 + KFD_DBG_TRAP_MASK_FP_OVERFLOW | 410 + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | 411 + KFD_DBG_TRAP_MASK_FP_INEXACT | 412 + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | 413 + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | 414 + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); 415 + uint32_t ret; 416 + 417 + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); 418 + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); 419 + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); 420 + 421 + return ret; 422 + } 423 + 424 + static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) 425 + { 426 + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); 427 + 428 + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) 429 + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; 430 + 431 + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) 432 + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; 433 + 434 + return ret; 435 + } 436 + 437 + /* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ 438 + static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override( 439 + struct amdgpu_device *adev, 440 + uint32_t vmid, 441 + uint32_t trap_override, 442 + uint32_t trap_mask_bits, 443 + uint32_t trap_mask_request, 444 + uint32_t *trap_mask_prev, 445 + uint32_t kfd_dbg_trap_cntl_prev) 446 + 447 + { 448 + uint32_t data = 0; 449 + 450 + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); 451 + 452 + data = (trap_mask_bits & trap_mask_request) | 453 + (*trap_mask_prev & ~trap_mask_request); 454 + data = trap_mask_map_sw_to_hw(data); 455 + 456 + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); 457 + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); 458 + 459 + return data; 460 + } 461 + 462 + #define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) 463 + static uint32_t kgd_gfx_v9_4_3_set_address_watch( 464 + struct amdgpu_device *adev, 465 + uint64_t watch_address, 466 + uint32_t watch_address_mask, 467 + uint32_t watch_id, 468 + uint32_t watch_mode, 469 + uint32_t debug_vmid, 470 + uint32_t inst) 471 + { 472 + uint32_t watch_address_high; 473 + uint32_t watch_address_low; 474 + uint32_t watch_address_cntl; 475 + 476 + watch_address_cntl = 0; 477 + watch_address_low = lower_32_bits(watch_address); 478 + watch_address_high = upper_32_bits(watch_address) & 0xffff; 479 + 480 + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, 481 + TCP_WATCH0_CNTL, 482 + MODE, 483 + watch_mode); 484 + 485 + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, 486 + TCP_WATCH0_CNTL, 487 + MASK, 488 + watch_address_mask >> 7); 489 + 490 + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, 491 + TCP_WATCH0_CNTL, 492 + VALID, 493 + 1); 494 + 495 + WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 496 + regTCP_WATCH0_ADDR_H) + 497 + (watch_id * TCP_WATCH_STRIDE)), 498 + watch_address_high); 499 + 500 + WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 501 + regTCP_WATCH0_ADDR_L) + 502 + (watch_id * TCP_WATCH_STRIDE)), 503 + watch_address_low); 504 + 505 + return watch_address_cntl; 506 + } 507 + 508 + static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev, 509 + uint32_t watch_id) 510 + { 511 + return 0; 512 + } 513 + 366 514 const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { 367 515 .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, 368 516 .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, ··· 533 381 kgd_gfx_v9_set_vm_context_page_table_base, 534 382 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 535 383 .program_trap_handler_settings = 536 - kgd_gfx_v9_program_trap_handler_settings 384 + kgd_gfx_v9_program_trap_handler_settings, 385 + .build_grace_period_packet_info = 386 + kgd_gfx_v9_build_grace_period_packet_info, 387 + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 388 + .enable_debug_trap = kgd_aldebaran_enable_debug_trap, 389 + .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, 390 + .validate_trap_override_request = 391 + kgd_gfx_v9_4_3_validate_trap_override_request, 392 + .set_wave_launch_trap_override = 393 + kgd_gfx_v9_4_3_set_wave_launch_trap_override, 394 + .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, 395 + .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, 396 + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch 537 397 };
+6 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 886 886 uint32_t watch_address_mask, 887 887 uint32_t watch_id, 888 888 uint32_t watch_mode, 889 - uint32_t debug_vmid) 889 + uint32_t debug_vmid, 890 + uint32_t inst) 890 891 { 891 892 uint32_t watch_address_high; 892 893 uint32_t watch_address_low; ··· 969 968 * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 970 969 */ 971 970 void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, 972 - uint32_t *wait_times) 971 + uint32_t *wait_times, 972 + uint32_t inst) 973 973 974 974 { 975 975 *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); ··· 980 978 uint32_t wait_times, 981 979 uint32_t grace_period, 982 980 uint32_t *reg_offset, 983 - uint32_t *reg_data) 981 + uint32_t *reg_data, 982 + uint32_t inst) 984 983 { 985 984 *reg_data = wait_times; 986 985
+7 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
··· 44 44 uint32_t watch_address_mask, 45 45 uint32_t watch_id, 46 46 uint32_t watch_mode, 47 - uint32_t debug_vmid); 47 + uint32_t debug_vmid, 48 + uint32_t inst); 48 49 uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, 49 50 uint32_t watch_id); 50 - void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); 51 + void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, 52 + uint32_t *wait_times, 53 + uint32_t inst); 51 54 void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, 52 55 uint32_t wait_times, 53 56 uint32_t grace_period, 54 57 uint32_t *reg_offset, 55 - uint32_t *reg_data); 58 + uint32_t *reg_data, 59 + uint32_t inst);
+2 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
··· 743 743 uint32_t watch_address_mask, 744 744 uint32_t watch_id, 745 745 uint32_t watch_mode, 746 - uint32_t debug_vmid) 746 + uint32_t debug_vmid, 747 + uint32_t inst) 747 748 { 748 749 uint32_t watch_address_high; 749 750 uint32_t watch_address_low;
+10 -5
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 822 822 uint32_t watch_address_mask, 823 823 uint32_t watch_id, 824 824 uint32_t watch_mode, 825 - uint32_t debug_vmid) 825 + uint32_t debug_vmid, 826 + uint32_t inst) 826 827 { 827 828 uint32_t watch_address_high; 828 829 uint32_t watch_address_low; ··· 904 903 * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 905 904 */ 906 905 void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, 907 - uint32_t *wait_times) 906 + uint32_t *wait_times, 907 + uint32_t inst) 908 908 909 909 { 910 - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 910 + *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 911 + mmCP_IQ_WAIT_TIME2)); 911 912 } 912 913 913 914 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, ··· 1103 1100 uint32_t wait_times, 1104 1101 uint32_t grace_period, 1105 1102 uint32_t *reg_offset, 1106 - uint32_t *reg_data) 1103 + uint32_t *reg_data, 1104 + uint32_t inst) 1107 1105 { 1108 1106 *reg_data = wait_times; 1109 1107 ··· 1120 1116 SCH_WAVE, 1121 1117 grace_period); 1122 1118 1123 - *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); 1119 + *reg_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), 1120 + mmCP_IQ_WAIT_TIME2); 1124 1121 } 1125 1122 1126 1123 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
+7 -3
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 89 89 uint32_t watch_address_mask, 90 90 uint32_t watch_id, 91 91 uint32_t watch_mode, 92 - uint32_t debug_vmid); 92 + uint32_t debug_vmid, 93 + uint32_t inst); 93 94 uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, 94 95 uint32_t watch_id); 95 - void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); 96 + void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, 97 + uint32_t *wait_times, 98 + uint32_t inst); 96 99 void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, 97 100 uint32_t wait_times, 98 101 uint32_t grace_period, 99 102 uint32_t *reg_offset, 100 - uint32_t *reg_data); 103 + uint32_t *reg_data, 104 + uint32_t inst);
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
··· 466 466 watch_address_mask, 467 467 *watch_id, 468 468 watch_mode, 469 - pdd->dev->vm_info.last_vmid_kfd); 469 + pdd->dev->vm_info.last_vmid_kfd, 470 + 0); 470 471 amdgpu_gfx_off_ctrl(pdd->dev->adev, true); 471 472 472 473 if (!pdd->dev->kfd->shared_resources.enable_mes)
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 1621 1621 1622 1622 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1623 1623 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1624 - &dqm->wait_times); 1624 + &dqm->wait_times, 1625 + 0); 1625 1626 return 0; 1626 1627 } 1627 1628
+2 -1
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 298 298 pm->dqm->wait_times, 299 299 grace_period, 300 300 &reg_offset, 301 - &reg_data); 301 + &reg_data, 302 + 0); 302 303 303 304 if (grace_period == USE_DEFAULT_GRACE_PERIOD) 304 305 reg_data = pm->dqm->wait_times;
+6 -3
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 315 315 uint32_t watch_address_mask, 316 316 uint32_t watch_id, 317 317 uint32_t watch_mode, 318 - uint32_t debug_vmid); 318 + uint32_t debug_vmid, 319 + uint32_t inst); 319 320 uint32_t (*clear_address_watch)(struct amdgpu_device *adev, 320 321 uint32_t watch_id); 321 322 void (*get_iq_wait_times)(struct amdgpu_device *adev, 322 - uint32_t *wait_times); 323 + uint32_t *wait_times, 324 + uint32_t inst); 323 325 void (*build_grace_period_packet_info)(struct amdgpu_device *adev, 324 326 uint32_t wait_times, 325 327 uint32_t grace_period, 326 328 uint32_t *reg_offset, 327 - uint32_t *reg_data); 329 + uint32_t *reg_data, 330 + uint32_t inst); 328 331 void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid, 329 332 int *wave_cnt, int *max_waves_per_cu, uint32_t inst); 330 333 void (*program_trap_handler_settings)(struct amdgpu_device *adev,