Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdkfd: Update SMI throttle event bitmask

Update Arcturus/Aldebaran thermal throttle SMI event path to use
ASIC-independent throttler bits when logging.

Signed-off-by: Graham Sider <Graham.Sider@amd.com>
Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Graham Sider and committed by
Alex Deucher
410e302e e25515e2

+15 -11
+2 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
··· 332 332 int kgd2kfd_post_reset(struct kfd_dev *kfd); 333 333 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 334 334 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); 335 - void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); 335 + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask); 336 336 #else 337 337 static inline int kgd2kfd_init(void) 338 338 { ··· 391 391 } 392 392 393 393 static inline 394 - void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) 394 + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 395 395 { 396 396 } 397 397 #endif
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_device.c
··· 1369 1369 WARN_ONCE(count < 0, "Compute profile ref. count error"); 1370 1370 } 1371 1371 1372 - void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) 1372 + void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 1373 1373 { 1374 1374 if (kfd && kfd->init_complete) 1375 1375 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
+5 -5
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
··· 205 205 } 206 206 207 207 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, 208 - uint32_t throttle_bitmask) 208 + uint64_t throttle_bitmask) 209 209 { 210 210 struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd; 211 211 /* 212 212 * ThermalThrottle msg = throttle_bitmask(8): 213 213 * thermal_interrupt_count(16): 214 - * 1 byte event + 1 byte space + 8 byte throttle_bitmask + 214 + * 1 byte event + 1 byte space + 16 byte throttle_bitmask + 215 215 * 1 byte : + 16 byte thermal_interupt_counter + 1 byte \n + 216 - * 1 byte \0 = 29 216 + * 1 byte \0 = 37 217 217 */ 218 - char fifo_in[29]; 218 + char fifo_in[37]; 219 219 int len; 220 220 221 221 if (list_empty(&dev->smi_clients)) 222 222 return; 223 223 224 - len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%llx\n", 224 + len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n", 225 225 KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, 226 226 atomic64_read(&adev->smu.throttle_int_counter)); 227 227
+1 -1
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h
··· 26 26 int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd); 27 27 void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid); 28 28 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev, 29 - uint32_t throttle_bitmask); 29 + uint64_t throttle_bitmask); 30 30 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset); 31 31 32 32 #endif
+3 -1
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
··· 2178 2178 2179 2179 dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", 2180 2180 log_buf); 2181 - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); 2181 + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, 2182 + smu_cmn_get_indep_throttler_status(throttler_status, 2183 + arcturus_throttler_map)); 2182 2184 } 2183 2185 2184 2186 static uint16_t arcturus_get_current_pcie_link_speed(struct smu_context *smu)
+3 -1
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
··· 1653 1653 1654 1654 dev_warn(adev->dev, "WARN: GPU thermal throttling temperature reached, expect performance decrease. %s.\n", 1655 1655 log_buf); 1656 - kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, throttler_status); 1656 + kgd2kfd_smi_event_throttle(smu->adev->kfd.dev, 1657 + smu_cmn_get_indep_throttler_status(throttler_status, 1658 + aldebaran_throttler_map)); 1657 1659 } 1658 1660 1659 1661 static int aldebaran_get_current_pcie_link_speed(struct smu_context *smu)