Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: Reduce dequeue retry timeout for gfx9 family

Dequeue retry timeout controls the interval between checks for unmet
conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The
cost of additional bandwidth consumed by CP when polling memory
shouldn't be substantial.

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Harish Kasiviswanathan and committed by
Alex Deucher
8a7820c0 02fc2f3c

+72 -52
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
··· 189 189 .set_address_watch = kgd_gfx_aldebaran_set_address_watch, 190 190 .clear_address_watch = kgd_gfx_v9_clear_address_watch, 191 191 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 192 - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 192 + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, 193 193 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 194 194 .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, 195 195 .hqd_reset = kgd_gfx_v9_hqd_reset,
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
··· 415 415 .set_address_watch = kgd_gfx_v9_set_address_watch, 416 416 .clear_address_watch = kgd_gfx_v9_clear_address_watch, 417 417 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 418 - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 418 + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, 419 419 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 420 420 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 421 421 .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
··· 541 541 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 542 542 .program_trap_handler_settings = 543 543 kgd_gfx_v9_program_trap_handler_settings, 544 - .build_grace_period_packet_info = 545 - kgd_gfx_v9_build_grace_period_packet_info, 544 + .build_dequeue_wait_counts_packet_info = 545 + kgd_gfx_v9_build_dequeue_wait_counts_packet_info, 546 546 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 547 547 .enable_debug_trap = kgd_aldebaran_enable_debug_trap, 548 548 .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
+14 -14
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 1021 1021 *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 1022 1022 } 1023 1023 1024 - void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, 1024 + void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, 1025 1025 uint32_t wait_times, 1026 - uint32_t grace_period, 1026 + uint32_t sch_wave, 1027 + uint32_t que_sleep, 1027 1028 uint32_t *reg_offset, 1028 1029 uint32_t *reg_data) 1029 1030 { 1030 1031 *reg_data = wait_times; 1031 1032 1032 - /* 1033 - * The CP cannont handle a 0 grace period input and will result in 1034 - * an infinite grace period being set so set to 1 to prevent this. 1035 - */ 1036 - if (grace_period == 0) 1037 - grace_period = 1; 1038 - 1039 - *reg_data = REG_SET_FIELD(*reg_data, 1040 - CP_IQ_WAIT_TIME2, 1041 - SCH_WAVE, 1042 - grace_period); 1033 + if (sch_wave) 1034 + *reg_data = REG_SET_FIELD(*reg_data, 1035 + CP_IQ_WAIT_TIME2, 1036 + SCH_WAVE, 1037 + sch_wave); 1038 + if (que_sleep) 1039 + *reg_data = REG_SET_FIELD(*reg_data, 1040 + CP_IQ_WAIT_TIME2, 1041 + QUE_SLEEP, 1042 + que_sleep); 1043 1043 1044 1044 *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); 1045 1045 } ··· 1115 1115 .set_address_watch = kgd_gfx_v10_set_address_watch, 1116 1116 .clear_address_watch = kgd_gfx_v10_clear_address_watch, 1117 1117 .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, 1118 - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, 1118 + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, 1119 1119 .program_trap_handler_settings = program_trap_handler_settings, 1120 1120 .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, 1121 1121 .hqd_reset = kgd_gfx_v10_hqd_reset,
+3 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
··· 51 51 void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, 52 52 uint32_t *wait_times, 53 53 uint32_t inst); 54 - void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, 54 + void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, 55 55 uint32_t wait_times, 56 - uint32_t grace_period, 56 + uint32_t sch_wave, 57 + uint32_t que_sleep, 57 58 uint32_t *reg_offset, 58 59 uint32_t *reg_data); 59 60 uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
··· 673 673 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, 674 674 .program_trap_handler_settings = program_trap_handler_settings_v10_3, 675 675 .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, 676 - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, 676 + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, 677 677 .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, 678 678 .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, 679 679 .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
+14 -14
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 1077 1077 adev->gfx.cu_info.max_waves_per_simd; 1078 1078 } 1079 1079 1080 - void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, 1080 + void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, 1081 1081 uint32_t wait_times, 1082 - uint32_t grace_period, 1082 + uint32_t sch_wave, 1083 + uint32_t que_sleep, 1083 1084 uint32_t *reg_offset, 1084 1085 uint32_t *reg_data) 1085 1086 { 1086 1087 *reg_data = wait_times; 1087 1088 1088 - /* 1089 - * The CP cannot handle a 0 grace period input and will result in 1090 - * an infinite grace period being set so set to 1 to prevent this. 1091 - */ 1092 - if (grace_period == 0) 1093 - grace_period = 1; 1094 - 1095 - *reg_data = REG_SET_FIELD(*reg_data, 1096 - CP_IQ_WAIT_TIME2, 1097 - SCH_WAVE, 1098 - grace_period); 1089 + if (sch_wave) 1090 + *reg_data = REG_SET_FIELD(*reg_data, 1091 + CP_IQ_WAIT_TIME2, 1092 + SCH_WAVE, 1093 + sch_wave); 1094 + if (que_sleep) 1095 + *reg_data = REG_SET_FIELD(*reg_data, 1096 + CP_IQ_WAIT_TIME2, 1097 + QUE_SLEEP, 1098 + que_sleep); 1099 1099 1100 1100 *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); 1101 1101 } ··· 1255 1255 .set_address_watch = kgd_gfx_v9_set_address_watch, 1256 1256 .clear_address_watch = kgd_gfx_v9_clear_address_watch, 1257 1257 .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 1258 - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 1258 + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, 1259 1259 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 1260 1260 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 1261 1261 .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
+3 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 97 97 void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, 98 98 uint32_t *wait_times, 99 99 uint32_t inst); 100 - void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, 100 + void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, 101 101 uint32_t wait_times, 102 - uint32_t grace_period, 102 + uint32_t sch_wave, 103 + uint32_t que_sleep, 103 104 uint32_t *reg_offset, 104 105 uint32_t *reg_data); 105 106 uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
+30 -13
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 298 298 } 299 299 300 300 static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm, 301 - uint32_t sch_value, uint32_t *reg_offset, 301 + uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset, 302 302 uint32_t *reg_data) 303 303 { 304 - pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( 304 + pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info( 305 305 pm->dqm->dev->adev, 306 306 pm->dqm->wait_times, 307 307 sch_value, 308 + que_sleep, 308 309 reg_offset, 309 310 reg_data); 310 311 } ··· 320 319 uint32_t reg_data = 0; 321 320 322 321 switch (cmd) { 323 - case KFD_DEQUEUE_WAIT_INIT: 324 - /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 325 - if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && 326 - (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) 327 - pm_build_dequeue_wait_counts_packet_info(pm, 1, &reg_offset, &reg_data); 328 - else 322 + case KFD_DEQUEUE_WAIT_INIT: { 323 + uint32_t sch_wave = 0, que_sleep = 0; 324 + /* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40. 325 + * On a 1GHz machine this is roughly 1 microsecond, which is 326 + * about how long it takes to load data out of memory during 327 + * queue connect 328 + * QUE_SLEEP: Wait Count for Dequeue Retry. 329 + */ 330 + if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) && 331 + KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) { 332 + que_sleep = 1; 333 + 334 + /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 335 + if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu && 336 + (KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3))) 337 + sch_wave = 1; 338 + } else { 329 339 return 0; 340 + } 341 + pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep, 342 + &reg_offset, &reg_data); 343 + 330 344 break; 345 + } 331 346 case KFD_DEQUEUE_WAIT_RESET: 332 - /* function called only to get reg_offset */ 333 - pm_build_dequeue_wait_counts_packet_info(pm, 0, &reg_offset, &reg_data); 334 - reg_data = pm->dqm->wait_times; 347 + /* reg_data would be set to dqm->wait_times */ 348 + pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, &reg_offset, &reg_data); 335 349 break; 336 350 337 351 case KFD_DEQUEUE_WAIT_SET_SCH_WAVE: 338 352 /* The CP cannot handle value 0 and it will result in 339 - * an infinite grace period being set so set to 1 to prevent this. 353 + * an infinite grace period being set so set to 1 to prevent this. Also 354 + * avoid debugger API breakage as it sets 0 and expects a low value. 340 355 */ 341 356 if (!value) 342 357 value = 1; 343 - pm_build_dequeue_wait_counts_packet_info(pm, value, &reg_offset, &reg_data); 358 + pm_build_dequeue_wait_counts_packet_info(pm, value, 0, &reg_offset, &reg_data); 344 359 break; 345 360 default: 346 361 pr_err("Invalid dequeue wait cmd\n");
+3 -2
drivers/gpu/drm/amd/include/kgd_kfd_interface.h
··· 313 313 void (*get_iq_wait_times)(struct amdgpu_device *adev, 314 314 uint32_t *wait_times, 315 315 uint32_t inst); 316 - void (*build_grace_period_packet_info)(struct amdgpu_device *adev, 316 + void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev, 317 317 uint32_t wait_times, 318 - uint32_t grace_period, 318 + uint32_t sch_wave, 319 + uint32_t que_sleep, 319 320 uint32_t *reg_offset, 320 321 uint32_t *reg_data); 321 322 void (*get_cu_occupancy)(struct amdgpu_device *adev,