Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/powerplay: support temperature emergency max values

These new interfaces(temp1_emergency, temp2_emergency,
temp3_emergency) are supported on SOC15 dGPUs only.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Evan Quan and committed by
Alex Deucher
901cb599 437ccd17

+81 -6
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
··· 75 75 int min_temp; 76 76 /* high temperature threshold */ 77 77 int max_temp; 78 + /* edge max emergency(shutdown) temp */ 79 + int max_edge_emergency_temp; 78 80 /* hotspot low temperature threshold */ 79 81 int min_hotspot_temp; 80 82 /* hotspot high temperature critical threshold */ 81 83 int max_hotspot_crit_temp; 84 + /* hotspot max emergency(shutdown) temp */ 85 + int max_hotspot_emergency_temp; 82 86 /* memory low temperature threshold */ 83 87 int min_mem_temp; 84 88 /* memory high temperature critical threshold */ 85 89 int max_mem_crit_temp; 90 + /* memory max emergency(shutdown) temp */ 91 + int max_mem_emergency_temp; 86 92 /* was last interrupt low to high or high to low */ 87 93 bool high_to_low; 88 94 /* interrupt source */
+39 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
··· 1446 1446 return snprintf(buf, PAGE_SIZE, "%d\n", temp); 1447 1447 } 1448 1448 1449 + static ssize_t amdgpu_hwmon_show_temp_emergency(struct device *dev, 1450 + struct device_attribute *attr, 1451 + char *buf) 1452 + { 1453 + struct amdgpu_device *adev = dev_get_drvdata(dev); 1454 + int channel = to_sensor_dev_attr(attr)->index; 1455 + int temp = 0; 1456 + 1457 + if (channel >= PP_TEMP_MAX) 1458 + return -EINVAL; 1459 + 1460 + switch (channel) { 1461 + case PP_TEMP_JUNCTION: 1462 + temp = adev->pm.dpm.thermal.max_hotspot_emergency_temp; 1463 + break; 1464 + case PP_TEMP_EDGE: 1465 + temp = adev->pm.dpm.thermal.max_edge_emergency_temp; 1466 + break; 1467 + case PP_TEMP_MEM: 1468 + temp = adev->pm.dpm.thermal.max_mem_emergency_temp; 1469 + break; 1470 + } 1471 + 1472 + return snprintf(buf, PAGE_SIZE, "%d\n", temp); 1473 + } 1474 + 1449 1475 static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, 1450 1476 struct device_attribute *attr, 1451 1477 char *buf) ··· 2049 2023 * - temp[1-3]_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius 2050 2024 * - temp2_crit_hyst and temp3_crit_hyst are supported on SOC15 dGPUs only 2051 2025 * 2026 + * - temp[1-3]_emergency: temperature emergency max value(asic shutdown) in millidegrees Celsius 2027 + * - these are supported on SOC15 dGPUs only 2028 + * 2052 2029 * hwmon interfaces for GPU voltage: 2053 2030 * 2054 2031 * - in0_input: the voltage on the GPU in millivolts ··· 2101 2072 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0); 2102 2073 static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0); 2103 2074 static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1); 2075 + static SENSOR_DEVICE_ATTR(temp1_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_EDGE); 2104 2076 static SENSOR_DEVICE_ATTR(temp2_crit, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 0); 2105 2077 static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, amdgpu_hwmon_show_hotspot_temp_thresh, NULL, 1); 2078 + static SENSOR_DEVICE_ATTR(temp2_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_JUNCTION); 2106 2079 static SENSOR_DEVICE_ATTR(temp3_crit, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 0); 2107 2080 static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, amdgpu_hwmon_show_mem_temp_thresh, NULL, 1); 2081 + static SENSOR_DEVICE_ATTR(temp3_emergency, S_IRUGO, amdgpu_hwmon_show_temp_emergency, NULL, PP_TEMP_MEM); 2108 2082 static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1, amdgpu_hwmon_set_pwm1, 0); 2109 2083 static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_enable, amdgpu_hwmon_set_pwm1_enable, 0); 2110 2084 static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); ··· 2138 2106 &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, 2139 2107 &sensor_dev_attr_temp3_crit.dev_attr.attr, 2140 2108 &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, 2109 + &sensor_dev_attr_temp1_emergency.dev_attr.attr, 2110 + &sensor_dev_attr_temp2_emergency.dev_attr.attr, 2111 + &sensor_dev_attr_temp3_emergency.dev_attr.attr, 2141 2112 &sensor_dev_attr_pwm1.dev_attr.attr, 2142 2113 &sensor_dev_attr_pwm1_enable.dev_attr.attr, 2143 2114 &sensor_dev_attr_pwm1_min.dev_attr.attr, ··· 2269 2234 (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || 2270 2235 attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || 2271 2236 attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || 2272 - attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr)) 2237 + attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr || 2238 + attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr || 2239 + attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr || 2240 + attr == &sensor_dev_attr_temp3_emergency.dev_attr.attr)) 2273 2241 return 0; 2274 2242 2275 2243 return effective_mode;
+7 -1
drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
··· 228 228 struct PP_TemperatureRange range = { 229 229 TEMP_RANGE_MIN, 230 230 TEMP_RANGE_MAX, 231 - TEMP_RANGE_MIN, 232 231 TEMP_RANGE_MAX, 233 232 TEMP_RANGE_MIN, 233 + TEMP_RANGE_MAX, 234 + TEMP_RANGE_MAX, 235 + TEMP_RANGE_MIN, 236 + TEMP_RANGE_MAX, 234 237 TEMP_RANGE_MAX}; 235 238 struct amdgpu_device *adev = hwmgr->adev; 236 239 ··· 248 245 249 246 adev->pm.dpm.thermal.min_temp = range.min; 250 247 adev->pm.dpm.thermal.max_temp = range.max; 248 + adev->pm.dpm.thermal.max_edge_emergency_temp = range.edge_emergency_max; 251 249 adev->pm.dpm.thermal.min_hotspot_temp = range.hotspot_min; 252 250 adev->pm.dpm.thermal.max_hotspot_crit_temp = range.hotspot_crit_max; 251 + adev->pm.dpm.thermal.max_hotspot_emergency_temp = range.hotspot_emergency_max; 253 252 adev->pm.dpm.thermal.min_mem_temp = range.mem_min; 254 253 adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max; 254 + adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max; 255 255 256 256 return ret; 257 257 }
+6
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
··· 4859 4859 4860 4860 thermal_data->max = pp_table->TedgeLimit * 4861 4861 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4862 + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * 4863 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4862 4864 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * 4863 4865 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4866 + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * 4867 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4864 4868 thermal_data->mem_crit_max = pp_table->ThbmLimit * 4869 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4870 + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* 4865 4871 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 4866 4872 4867 4873 return 0;
+6
drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
··· 2534 2534 2535 2535 thermal_data->max = pp_table->TedgeLimit * 2536 2536 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2537 + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * 2538 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2537 2539 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * 2538 2540 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2541 + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * 2542 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2539 2543 thermal_data->mem_crit_max = pp_table->ThbmLimit * 2544 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2545 + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* 2540 2546 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 2541 2547 2542 2548 return 0;
+6
drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
··· 3982 3982 3983 3983 thermal_data->max = pp_table->TedgeLimit * 3984 3984 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3985 + thermal_data->edge_emergency_max = (pp_table->TedgeLimit + CTF_OFFSET_EDGE) * 3986 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3985 3987 thermal_data->hotspot_crit_max = pp_table->ThotspotLimit * 3986 3988 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3989 + thermal_data->hotspot_emergency_max = (pp_table->ThotspotLimit + CTF_OFFSET_HOTSPOT) * 3990 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3987 3991 thermal_data->mem_crit_max = pp_table->ThbmLimit * 3992 + PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3993 + thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)* 3988 3994 PP_TEMPERATURE_UNITS_PER_CENTIGRADES; 3989 3995 3990 3996 return 0;
+3
drivers/gpu/drm/amd/powerplay/inc/power_state.h
··· 124 124 struct PP_TemperatureRange { 125 125 int min; 126 126 int max; 127 + int edge_emergency_max; 127 128 int hotspot_min; 128 129 int hotspot_crit_max; 130 + int hotspot_emergency_max; 129 131 int mem_min; 130 132 int mem_crit_max; 133 + int mem_emergency_max; 131 134 }; 132 135 133 136 struct PP_StateValidationBlock {
+8 -4
drivers/gpu/drm/amd/powerplay/inc/pp_thermal.h
··· 27 27 28 28 static const struct PP_TemperatureRange SMU7ThermalWithDelayPolicy[] = 29 29 { 30 - {-273150, 99000, -273150, 99000, -273150, 99000}, 31 - { 120000, 120000, 120000, 120000, 120000, 120000}, 30 + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, 31 + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, 32 32 }; 33 33 34 34 static const struct PP_TemperatureRange SMU7ThermalPolicy[] = 35 35 { 36 - {-273150, 99000, -273150, 99000, -273150, 99000}, 37 - { 120000, 120000, 120000, 120000, 120000, 120000}, 36 + {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, 37 + { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, 38 38 }; 39 + 40 + #define CTF_OFFSET_EDGE 5 41 + #define CTF_OFFSET_HOTSPOT 5 42 + #define CTF_OFFSET_HBM 5 39 43 40 44 #endif