Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

amdgpu/pm/legacy: fix suspend/resume issues

resume and irq handler happily races in set_power_state()

* amdgpu_legacy_dpm_compute_clocks() needs lock
* protect irq work handler
* fix dpm_enabled usage

v2: fix clang build, integrate Lijo's comments (Alex)

Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2524
Fixes: 3712e7a49459 ("drm/amd/pm: unified lock protections in amdgpu_dpm.c")
Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
Tested-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name> # on Oland PRO
Signed-off-by: chr[] <chris@rudorff.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit ee3dc9e204d271c9c7a8d4d38a0bce4745d33e71)
Cc: stable@vger.kernel.org

authored by

chr[] and committed by
Alex Deucher
91dcc66b d082ecbc

+45 -14
+19 -6
drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c
··· 3042 3042 if (!amdgpu_dpm) 3043 3043 return 0; 3044 3044 3045 + mutex_lock(&adev->pm.mutex); 3045 3046 kv_dpm_setup_asic(adev); 3046 3047 ret = kv_dpm_enable(adev); 3047 3048 if (ret) ··· 3050 3049 else 3051 3050 adev->pm.dpm_enabled = true; 3052 3051 amdgpu_legacy_dpm_compute_clocks(adev); 3052 + mutex_unlock(&adev->pm.mutex); 3053 + 3053 3054 return ret; 3054 3055 } 3055 3056 ··· 3069 3066 { 3070 3067 struct amdgpu_device *adev = ip_block->adev; 3071 3068 3069 + cancel_work_sync(&adev->pm.dpm.thermal.work); 3070 + 3072 3071 if (adev->pm.dpm_enabled) { 3072 + mutex_lock(&adev->pm.mutex); 3073 + adev->pm.dpm_enabled = false; 3073 3074 /* disable dpm */ 3074 3075 kv_dpm_disable(adev); 3075 3076 /* reset the power state */ 3076 3077 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; 3078 + mutex_unlock(&adev->pm.mutex); 3077 3079 } 3078 3080 return 0; 3079 3081 } 3080 3082 3081 3083 static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) 3082 3084 { 3083 - int ret; 3085 + int ret = 0; 3084 3086 struct amdgpu_device *adev = ip_block->adev; 3085 3087 3086 - if (adev->pm.dpm_enabled) { 3088 + if (!amdgpu_dpm) 3089 + return 0; 3090 + 3091 + if (!adev->pm.dpm_enabled) { 3092 + mutex_lock(&adev->pm.mutex); 3087 3093 /* asic init will reset to the boot state */ 3088 3094 kv_dpm_setup_asic(adev); 3089 3095 ret = kv_dpm_enable(adev); 3090 - if (ret) 3096 + if (ret) { 3091 3097 adev->pm.dpm_enabled = false; 3092 - else 3098 + } else { 3093 3099 adev->pm.dpm_enabled = true; 3094 - if (adev->pm.dpm_enabled) 3095 3100 amdgpu_legacy_dpm_compute_clocks(adev); 3101 + } 3102 + mutex_unlock(&adev->pm.mutex); 3096 3103 } 3097 - return 0; 3104 + return ret; 3098 3105 } 3099 3106 3100 3107 static bool kv_dpm_is_idle(void *handle)
+6 -2
drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c
··· 1009 1009 enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; 1010 1010 int temp, size = sizeof(temp); 1011 1011 1012 - if (!adev->pm.dpm_enabled) 1013 - return; 1012 + mutex_lock(&adev->pm.mutex); 1014 1013 1014 + if (!adev->pm.dpm_enabled) { 1015 + mutex_unlock(&adev->pm.mutex); 1016 + return; 1017 + } 1015 1018 if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, 1016 1019 AMDGPU_PP_SENSOR_GPU_TEMP, 1017 1020 (void *)&temp, ··· 1036 1033 adev->pm.dpm.state = dpm_state; 1037 1034 1038 1035 amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); 1036 + mutex_unlock(&adev->pm.mutex); 1039 1037 }
+20 -6
drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c
··· 7786 7786 if (!amdgpu_dpm) 7787 7787 return 0; 7788 7788 7789 + mutex_lock(&adev->pm.mutex); 7789 7790 si_dpm_setup_asic(adev); 7790 7791 ret = si_dpm_enable(adev); 7791 7792 if (ret) ··· 7794 7793 else 7795 7794 adev->pm.dpm_enabled = true; 7796 7795 amdgpu_legacy_dpm_compute_clocks(adev); 7796 + mutex_unlock(&adev->pm.mutex); 7797 7797 return ret; 7798 7798 } 7799 7799 ··· 7812 7810 { 7813 7811 struct amdgpu_device *adev = ip_block->adev; 7814 7812 7813 + cancel_work_sync(&adev->pm.dpm.thermal.work); 7814 + 7815 7815 if (adev->pm.dpm_enabled) { 7816 + mutex_lock(&adev->pm.mutex); 7817 + adev->pm.dpm_enabled = false; 7816 7818 /* disable dpm */ 7817 7819 si_dpm_disable(adev); 7818 7820 /* reset the power state */ 7819 7821 adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; 7822 + mutex_unlock(&adev->pm.mutex); 7820 7823 } 7824 + 7821 7825 return 0; 7822 7826 } 7823 7827 7824 7828 static int si_dpm_resume(struct amdgpu_ip_block *ip_block) 7825 7829 { 7826 - int ret; 7830 + int ret = 0; 7827 7831 struct amdgpu_device *adev = ip_block->adev; 7828 7832 7829 - if (adev->pm.dpm_enabled) { 7833 + if (!amdgpu_dpm) 7834 + return 0; 7835 + 7836 + if (!adev->pm.dpm_enabled) { 7830 7837 /* asic init will reset to the boot state */ 7838 + mutex_lock(&adev->pm.mutex); 7831 7839 si_dpm_setup_asic(adev); 7832 7840 ret = si_dpm_enable(adev); 7833 - if (ret) 7841 + if (ret) { 7834 7842 adev->pm.dpm_enabled = false; 7835 - else 7843 + } else { 7836 7844 adev->pm.dpm_enabled = true; 7837 - if (adev->pm.dpm_enabled) 7838 7845 amdgpu_legacy_dpm_compute_clocks(adev); 7846 + } 7847 + mutex_unlock(&adev->pm.mutex); 7839 7848 } 7840 - return 0; 7849 + 7850 + return ret; 7841 7851 } 7842 7852 7843 7853 static bool si_dpm_is_idle(void *handle)