Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/powerplay: a quick fix for the deadlock issue below

NFO: task ocltst:2028 blocked for more than 120 seconds.
Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu
echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
cltst D 0 2028 2026 0x00000000
all Trace:
__schedule+0x2c0/0x870
schedule+0x2c/0x70
schedule_preempt_disabled+0xe/0x10
__mutex_lock.isra.9+0x26d/0x4e0
__mutex_lock_slowpath+0x13/0x20
? __mutex_lock_slowpath+0x13/0x20
mutex_lock+0x2f/0x40
amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu]
gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu]
gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu]
amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu]
pp_dpm_force_performance_level+0xe7/0x100 [amdgpu]
amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu]

Fixes: a64c9e15e624 ("drm/amd/powerplay: cleanup the interfaces for powergate setting through SMU")
Signed-off-by: Evan Quan <evan.quan@amd.com>
Reported-by: Rui Teng <Rui.Teng@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Evan Quan and committed by
Alex Deucher
2ac0d686 0e5b7a95

+43 -15
+43 -15
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
··· 946 946 bool swsmu = is_support_sw_smu(adev); 947 947 948 948 switch (block_type) { 949 - case AMD_IP_BLOCK_TYPE_GFX: 950 949 case AMD_IP_BLOCK_TYPE_UVD: 951 - case AMD_IP_BLOCK_TYPE_VCN: 952 950 case AMD_IP_BLOCK_TYPE_VCE: 953 - case AMD_IP_BLOCK_TYPE_SDMA: 954 951 if (swsmu) { 955 952 ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); 956 - } else { 957 - if (adev->powerplay.pp_funcs && 958 - adev->powerplay.pp_funcs->set_powergating_by_smu) { 959 - mutex_lock(&adev->pm.mutex); 960 - ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( 961 - (adev)->powerplay.pp_handle, block_type, gate)); 962 - mutex_unlock(&adev->pm.mutex); 963 - } 953 + } else if (adev->powerplay.pp_funcs && 954 + adev->powerplay.pp_funcs->set_powergating_by_smu) { 955 + /* 956 + * TODO: need a better lock mechanism 957 + * 958 + * Here adev->pm.mutex lock protection is enforced on 959 + * UVD and VCE cases only. Since for other cases, there 960 + * may be already lock protection in amdgpu_pm.c. 961 + * This is a quick fix for the deadlock issue below. 962 + * NFO: task ocltst:2028 blocked for more than 120 seconds. 963 + * Tainted: G OE 5.0.0-37-generic #40~18.04.1-Ubuntu 964 + * echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 965 + * cltst D 0 2028 2026 0x00000000 966 + * all Trace: 967 + * __schedule+0x2c0/0x870 968 + * schedule+0x2c/0x70 969 + * schedule_preempt_disabled+0xe/0x10 970 + * __mutex_lock.isra.9+0x26d/0x4e0 971 + * __mutex_lock_slowpath+0x13/0x20 972 + * ? __mutex_lock_slowpath+0x13/0x20 973 + * mutex_lock+0x2f/0x40 974 + * amdgpu_dpm_set_powergating_by_smu+0x64/0xe0 [amdgpu] 975 + * gfx_v8_0_enable_gfx_static_mg_power_gating+0x3c/0x70 [amdgpu] 976 + * gfx_v8_0_set_powergating_state+0x66/0x260 [amdgpu] 977 + * amdgpu_device_ip_set_powergating_state+0x62/0xb0 [amdgpu] 978 + * pp_dpm_force_performance_level+0xe7/0x100 [amdgpu] 979 + * amdgpu_set_dpm_forced_performance_level+0x129/0x330 [amdgpu] 980 + */ 981 + mutex_lock(&adev->pm.mutex); 982 + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( 983 + (adev)->powerplay.pp_handle, block_type, gate)); 984 + mutex_unlock(&adev->pm.mutex); 964 985 } 986 + break; 987 + case AMD_IP_BLOCK_TYPE_GFX: 988 + case AMD_IP_BLOCK_TYPE_VCN: 989 + case AMD_IP_BLOCK_TYPE_SDMA: 990 + if (swsmu) 991 + ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate); 992 + else if (adev->powerplay.pp_funcs && 993 + adev->powerplay.pp_funcs->set_powergating_by_smu) 994 + ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( 995 + (adev)->powerplay.pp_handle, block_type, gate)); 965 996 break; 966 997 case AMD_IP_BLOCK_TYPE_JPEG: 967 998 if (swsmu) ··· 1001 970 case AMD_IP_BLOCK_TYPE_GMC: 1002 971 case AMD_IP_BLOCK_TYPE_ACP: 1003 972 if (adev->powerplay.pp_funcs && 1004 - adev->powerplay.pp_funcs->set_powergating_by_smu) { 1005 - mutex_lock(&adev->pm.mutex); 973 + adev->powerplay.pp_funcs->set_powergating_by_smu) 1006 974 ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu( 1007 975 (adev)->powerplay.pp_handle, block_type, gate)); 1008 - mutex_unlock(&adev->pm.mutex); 1009 - } 1010 976 break; 1011 977 default: 1012 978 break;