Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd: Add detailed GFXOFF stats to debugfs

Add debugfs interface to log GFXOFF statistics:

- Read amdgpu_gfxoff_count to get the total GFXOFF entry count at the
time of query since system power-up

- Write 1 to amdgpu_gfxoff_residency to start logging, and 0 to stop.
Read it to get average GFXOFF residency % multiplied by 100
during the last logging interval.

Both features are designed to be keep the values persistent between
suspends.

Signed-off-by: André Almeida <andrealmeid@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

André Almeida and committed by
Alex Deucher
0ad7347a 30f2d2e4

+321
+168
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
··· 1043 1043 } 1044 1044 1045 1045 /** 1046 + * amdgpu_debugfs_gfxoff_residency_read - Read GFXOFF residency 1047 + * 1048 + * @f: open file handle 1049 + * @buf: User buffer to store read data in 1050 + * @size: Number of bytes to read 1051 + * @pos: Offset to seek to 1052 + * 1053 + * Read the last residency value logged. It doesn't auto update, one needs to 1054 + * stop logging before getting the current value. 1055 + */ 1056 + static ssize_t amdgpu_debugfs_gfxoff_residency_read(struct file *f, char __user *buf, 1057 + size_t size, loff_t *pos) 1058 + { 1059 + struct amdgpu_device *adev = file_inode(f)->i_private; 1060 + ssize_t result = 0; 1061 + int r; 1062 + 1063 + if (size & 0x3 || *pos & 0x3) 1064 + return -EINVAL; 1065 + 1066 + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 1067 + if (r < 0) { 1068 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1069 + return r; 1070 + } 1071 + 1072 + while (size) { 1073 + uint32_t value; 1074 + 1075 + r = amdgpu_get_gfx_off_residency(adev, &value); 1076 + if (r) 1077 + goto out; 1078 + 1079 + r = put_user(value, (uint32_t *)buf); 1080 + if (r) 1081 + goto out; 1082 + 1083 + result += 4; 1084 + buf += 4; 1085 + *pos += 4; 1086 + size -= 4; 1087 + } 1088 + 1089 + r = result; 1090 + out: 1091 + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 1092 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1093 + 1094 + return r; 1095 + } 1096 + 1097 + /** 1098 + * amdgpu_debugfs_gfxoff_residency_write - Log GFXOFF Residency 1099 + * 1100 + * @f: open file handle 1101 + * @buf: User buffer to write data from 1102 + * @size: Number of bytes to write 1103 + * @pos: Offset to seek to 1104 + * 1105 + * Write a 32-bit non-zero to start logging; write a 32-bit zero to stop 1106 + */ 1107 + static ssize_t amdgpu_debugfs_gfxoff_residency_write(struct file *f, const char __user *buf, 1108 + size_t size, loff_t *pos) 1109 + { 1110 + struct amdgpu_device *adev = file_inode(f)->i_private; 1111 + ssize_t result = 0; 1112 + int r; 1113 + 1114 + if (size & 0x3 || *pos & 0x3) 1115 + return -EINVAL; 1116 + 1117 + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 1118 + if (r < 0) { 1119 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1120 + return r; 1121 + } 1122 + 1123 + while (size) { 1124 + u32 value; 1125 + 1126 + r = get_user(value, (uint32_t *)buf); 1127 + if (r) 1128 + goto out; 1129 + 1130 + amdgpu_set_gfx_off_residency(adev, value ? true : false); 1131 + 1132 + result += 4; 1133 + buf += 4; 1134 + *pos += 4; 1135 + size -= 4; 1136 + } 1137 + 1138 + r = result; 1139 + out: 1140 + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 1141 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1142 + 1143 + return r; 1144 + } 1145 + 1146 + 1147 + /** 1148 + * amdgpu_debugfs_gfxoff_count_read - Read GFXOFF entry count 1149 + * 1150 + * @f: open file handle 1151 + * @buf: User buffer to store read data in 1152 + * @size: Number of bytes to read 1153 + * @pos: Offset to seek to 1154 + */ 1155 + static ssize_t amdgpu_debugfs_gfxoff_count_read(struct file *f, char __user *buf, 1156 + size_t size, loff_t *pos) 1157 + { 1158 + struct amdgpu_device *adev = file_inode(f)->i_private; 1159 + ssize_t result = 0; 1160 + int r; 1161 + 1162 + if (size & 0x3 || *pos & 0x3) 1163 + return -EINVAL; 1164 + 1165 + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); 1166 + if (r < 0) { 1167 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1168 + return r; 1169 + } 1170 + 1171 + while (size) { 1172 + u64 value = 0; 1173 + 1174 + r = amdgpu_get_gfx_off_entrycount(adev, &value); 1175 + if (r) 1176 + goto out; 1177 + 1178 + r = put_user(value, (u64 *)buf); 1179 + if (r) 1180 + goto out; 1181 + 1182 + result += 4; 1183 + buf += 4; 1184 + *pos += 4; 1185 + size -= 4; 1186 + } 1187 + 1188 + r = result; 1189 + out: 1190 + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); 1191 + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); 1192 + 1193 + return r; 1194 + } 1195 + 1196 + /** 1046 1197 * amdgpu_debugfs_gfxoff_write - Enable/disable GFXOFF 1047 1198 * 1048 1199 * @f: open file handle ··· 1400 1249 .llseek = default_llseek 1401 1250 }; 1402 1251 1252 + static const struct file_operations amdgpu_debugfs_gfxoff_count_fops = { 1253 + .owner = THIS_MODULE, 1254 + .read = amdgpu_debugfs_gfxoff_count_read, 1255 + .llseek = default_llseek 1256 + }; 1257 + 1258 + static const struct file_operations amdgpu_debugfs_gfxoff_residency_fops = { 1259 + .owner = THIS_MODULE, 1260 + .read = amdgpu_debugfs_gfxoff_residency_read, 1261 + .write = amdgpu_debugfs_gfxoff_residency_write, 1262 + .llseek = default_llseek 1263 + }; 1264 + 1403 1265 static const struct file_operations *debugfs_regs[] = { 1404 1266 &amdgpu_debugfs_regs_fops, 1405 1267 &amdgpu_debugfs_regs2_fops, ··· 1425 1261 &amdgpu_debugfs_gpr_fops, 1426 1262 &amdgpu_debugfs_gfxoff_fops, 1427 1263 &amdgpu_debugfs_gfxoff_status_fops, 1264 + &amdgpu_debugfs_gfxoff_count_fops, 1265 + &amdgpu_debugfs_gfxoff_residency_fops, 1428 1266 }; 1429 1267 1430 1268 static const char *debugfs_regs_names[] = { ··· 1441 1275 "amdgpu_gpr", 1442 1276 "amdgpu_gfxoff", 1443 1277 "amdgpu_gfxoff_status", 1278 + "amdgpu_gfxoff_count", 1279 + "amdgpu_gfxoff_residency", 1444 1280 }; 1445 1281 1446 1282 /**
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
··· 3577 3577 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); 3578 3578 3579 3579 adev->gfx.gfx_off_req_count = 1; 3580 + adev->gfx.gfx_off_residency = 0; 3581 + adev->gfx.gfx_off_entrycount = 0; 3580 3582 adev->pm.ac_power = power_supply_is_system_supplied() > 0; 3581 3583 3582 3584 atomic_set(&adev->throttling_logging_enabled, 1);
+39
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 610 610 mutex_unlock(&adev->gfx.gfx_off_mutex); 611 611 } 612 612 613 + int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) 614 + { 615 + int r = 0; 616 + 617 + mutex_lock(&adev->gfx.gfx_off_mutex); 618 + 619 + r = amdgpu_dpm_set_residency_gfxoff(adev, value); 620 + 621 + mutex_unlock(&adev->gfx.gfx_off_mutex); 622 + 623 + return r; 624 + } 625 + 626 + int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *value) 627 + { 628 + int r = 0; 629 + 630 + mutex_lock(&adev->gfx.gfx_off_mutex); 631 + 632 + r = amdgpu_dpm_get_residency_gfxoff(adev, value); 633 + 634 + mutex_unlock(&adev->gfx.gfx_off_mutex); 635 + 636 + return r; 637 + } 638 + 639 + int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value) 640 + { 641 + int r = 0; 642 + 643 + mutex_lock(&adev->gfx.gfx_off_mutex); 644 + 645 + r = amdgpu_dpm_get_entrycount_gfxoff(adev, value); 646 + 647 + mutex_unlock(&adev->gfx.gfx_off_mutex); 648 + 649 + return r; 650 + } 651 + 613 652 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) 614 653 { 615 654
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
··· 336 336 struct mutex gfx_off_mutex; 337 337 uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ 338 338 struct delayed_work gfx_off_delay_work; 339 + uint32_t gfx_off_residency; 340 + uint64_t gfx_off_entrycount; 339 341 340 342 /* pipe reservation */ 341 343 struct mutex pipe_reserve_mutex; ··· 409 407 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); 410 408 int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); 411 409 int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); 410 + void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); 411 + int amdgpu_get_gfx_off_entrycount(struct amdgpu_device *adev, u64 *value); 412 + int amdgpu_get_gfx_off_residency(struct amdgpu_device *adev, u32 *residency); 413 + int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value); 412 414 int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, 413 415 void *err_data, 414 416 struct amdgpu_iv_entry *entry);
+45
drivers/gpu/drm/amd/pm/amdgpu_dpm.c
··· 668 668 return ret; 669 669 } 670 670 671 + int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value) 672 + { 673 + struct smu_context *smu = adev->powerplay.pp_handle; 674 + int ret = 0; 675 + 676 + if (!is_support_sw_smu(adev)) 677 + return -EOPNOTSUPP; 678 + 679 + mutex_lock(&adev->pm.mutex); 680 + ret = smu_set_residency_gfxoff(smu, value); 681 + mutex_unlock(&adev->pm.mutex); 682 + 683 + return ret; 684 + } 685 + 686 + int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value) 687 + { 688 + struct smu_context *smu = adev->powerplay.pp_handle; 689 + int ret = 0; 690 + 691 + if (!is_support_sw_smu(adev)) 692 + return -EOPNOTSUPP; 693 + 694 + mutex_lock(&adev->pm.mutex); 695 + ret = smu_get_residency_gfxoff(smu, value); 696 + mutex_unlock(&adev->pm.mutex); 697 + 698 + return ret; 699 + } 700 + 701 + int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value) 702 + { 703 + struct smu_context *smu = adev->powerplay.pp_handle; 704 + int ret = 0; 705 + 706 + if (!is_support_sw_smu(adev)) 707 + return -EOPNOTSUPP; 708 + 709 + mutex_lock(&adev->pm.mutex); 710 + ret = smu_get_entrycount_gfxoff(smu, value); 711 + mutex_unlock(&adev->pm.mutex); 712 + 713 + return ret; 714 + } 715 + 671 716 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value) 672 717 { 673 718 struct smu_context *smu = adev->powerplay.pp_handle;
+3
drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
··· 435 435 int amdgpu_dpm_write_watermarks_table(struct amdgpu_device *adev); 436 436 int amdgpu_dpm_wait_for_event(struct amdgpu_device *adev, enum smu_event_type event, 437 437 uint64_t event_arg); 438 + int amdgpu_dpm_get_residency_gfxoff(struct amdgpu_device *adev, u32 *value); 439 + int amdgpu_dpm_set_residency_gfxoff(struct amdgpu_device *adev, bool value); 440 + int amdgpu_dpm_get_entrycount_gfxoff(struct amdgpu_device *adev, u64 *value); 438 441 int amdgpu_dpm_get_status_gfxoff(struct amdgpu_device *adev, uint32_t *value); 439 442 uint64_t amdgpu_dpm_get_thermal_throttling_counter(struct amdgpu_device *adev); 440 443 void amdgpu_dpm_gfx_state_change(struct amdgpu_device *adev,
+33
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
··· 90 90 return smu_set_pp_feature_mask(smu, new_mask); 91 91 } 92 92 93 + int smu_set_residency_gfxoff(struct smu_context *smu, bool value) 94 + { 95 + if (!smu->ppt_funcs->set_gfx_off_residency) 96 + return -EINVAL; 97 + 98 + return smu_set_gfx_off_residency(smu, value); 99 + } 100 + 101 + int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value) 102 + { 103 + if (!smu->ppt_funcs->get_gfx_off_residency) 104 + return -EINVAL; 105 + 106 + return smu_get_gfx_off_residency(smu, value); 107 + } 108 + 109 + int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value) 110 + { 111 + if (!smu->ppt_funcs->get_gfx_off_entrycount) 112 + return -EINVAL; 113 + 114 + return smu_get_gfx_off_entrycount(smu, value); 115 + } 116 + 93 117 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value) 94 118 { 95 119 if (!smu->ppt_funcs->get_gfx_off_status) ··· 1600 1576 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1601 1577 struct smu_context *smu = adev->powerplay.pp_handle; 1602 1578 int ret; 1579 + uint64_t count; 1603 1580 1604 1581 if (amdgpu_sriov_vf(adev)&& !amdgpu_sriov_is_pp_one_vf(adev)) 1605 1582 return 0; ··· 1617 1592 smu->watermarks_bitmap &= ~(WATERMARKS_LOADED); 1618 1593 1619 1594 smu_set_gfx_cgpg(smu, false); 1595 + 1596 + /* 1597 + * pwfw resets entrycount when device is suspended, so we save the 1598 + * last value to be used when we resume to keep it consistent 1599 + */ 1600 + ret = smu_get_entrycount_gfxoff(smu, &count); 1601 + if (!ret) 1602 + adev->gfx.gfx_off_entrycount = count; 1620 1603 1621 1604 return 0; 1622 1605 }
+22
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
··· 1112 1112 uint32_t (*get_gfx_off_status)(struct smu_context *smu); 1113 1113 1114 1114 /** 1115 + * @gfx_off_entrycount: total GFXOFF entry count at the time of 1116 + * query since system power-up 1117 + */ 1118 + u32 (*get_gfx_off_entrycount)(struct smu_context *smu, uint64_t *entrycount); 1119 + 1120 + /** 1121 + * @set_gfx_off_residency: set 1 to start logging, 0 to stop logging 1122 + */ 1123 + u32 (*set_gfx_off_residency)(struct smu_context *smu, bool start); 1124 + 1125 + /** 1126 + * @get_gfx_off_residency: Average GFXOFF residency % during the logging interval 1127 + */ 1128 + u32 (*get_gfx_off_residency)(struct smu_context *smu, uint32_t *residency); 1129 + 1130 + /** 1115 1131 * @register_irq_handler: Register interupt request handlers. 1116 1132 */ 1117 1133 int (*register_irq_handler)(struct smu_context *smu); ··· 1469 1453 int smu_set_ac_dc(struct smu_context *smu); 1470 1454 1471 1455 int smu_allow_xgmi_power_down(struct smu_context *smu, bool en); 1456 + 1457 + int smu_get_entrycount_gfxoff(struct smu_context *smu, u64 *value); 1458 + 1459 + int smu_get_residency_gfxoff(struct smu_context *smu, u32 *value); 1460 + 1461 + int smu_set_residency_gfxoff(struct smu_context *smu, bool value); 1472 1462 1473 1463 int smu_get_status_gfxoff(struct smu_context *smu, uint32_t *value); 1474 1464
+3
drivers/gpu/drm/amd/pm/swsmu/smu_internal.h
··· 47 47 #define smu_notify_memory_pool_location(smu) smu_ppt_funcs(notify_memory_pool_location, 0, smu) 48 48 #define smu_gfx_off_control(smu, enable) smu_ppt_funcs(gfx_off_control, 0, smu, enable) 49 49 #define smu_get_gfx_off_status(smu) smu_ppt_funcs(get_gfx_off_status, 0, smu) 50 + #define smu_get_gfx_off_entrycount(smu, value) smu_ppt_funcs(get_gfx_off_entrycount, 0, smu, value) 51 + #define smu_get_gfx_off_residency(smu, value) smu_ppt_funcs(get_gfx_off_residency, 0, smu, value) 52 + #define smu_set_gfx_off_residency(smu, value) smu_ppt_funcs(set_gfx_off_residency, 0, smu, value) 50 53 #define smu_set_last_dcef_min_deep_sleep_clk(smu) smu_ppt_funcs(set_last_dcef_min_deep_sleep_clk, 0, smu) 51 54 #define smu_system_features_control(smu, en) smu_ppt_funcs(system_features_control, 0, smu, en) 52 55 #define smu_init_max_sustainable_clocks(smu) smu_ppt_funcs(init_max_sustainable_clocks, 0, smu)