Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: Fix race condition in DPIA AUX transfer

[Why]
This fix was intended for improving on coding style but in the process
uncovers a race condition, which explains why we are getting incorrect
length in DPIA AUX replies. Due to the call path of DPIA AUX going from
DC back to DM layer then again into DC and the added complexities on top
of current DC AUX implementation, a proper fix to rely on current dc_lock
to address the race condition is difficult without a major overhual
on how DPIA AUX is implemented.

[How]
- Add a mutex dpia_aux_lock to protect DPIA AUX transfers
- Remove DMUB_ASYNC_TO_SYNC_ACCESS_* codes and rely solely on
aux_return_code_type for error reporting and handling
- Separate SET_CONFIG from DPIA AUX transfer because they have quiet
different processing logic
- Remove unnecessary type casting to and from void * type

Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Jasdeep Dhillon <jdhillon@amd.com>
Signed-off-by: Stylon Wang <stylon.wang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Stylon Wang and committed by
Alex Deucher
ead08b95 719b59a3

+91 -87
+73 -78
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
··· 146 146 /* Number of bytes in PSP footer for firmware. */ 147 147 #define PSP_FOOTER_BYTES 0x100 148 148 149 - /* 150 - * DMUB Async to Sync Mechanism Status 151 - */ 152 - #define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1 153 - #define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2 154 - #define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3 155 - #define DMUB_ASYNC_TO_SYNC_ACCESS_INVALID 4 156 - 157 149 /** 158 150 * DOC: overview 159 151 * ··· 1433 1441 memset(&init_params, 0, sizeof(init_params)); 1434 1442 #endif 1435 1443 1444 + mutex_init(&adev->dm.dpia_aux_lock); 1436 1445 mutex_init(&adev->dm.dc_lock); 1437 1446 mutex_init(&adev->dm.audio_lock); 1438 1447 ··· 1798 1805 1799 1806 mutex_destroy(&adev->dm.audio_lock); 1800 1807 mutex_destroy(&adev->dm.dc_lock); 1808 + mutex_destroy(&adev->dm.dpia_aux_lock); 1801 1809 1802 1810 return; 1803 1811 } ··· 10198 10204 return value; 10199 10205 } 10200 10206 10201 - static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux, 10202 - struct dc_context *ctx, 10203 - uint8_t status_type, 10204 - uint32_t *operation_result) 10207 + int amdgpu_dm_process_dmub_aux_transfer_sync( 10208 + struct dc_context *ctx, 10209 + unsigned int link_index, 10210 + struct aux_payload *payload, 10211 + enum aux_return_code_type *operation_result) 10205 10212 { 10206 10213 struct amdgpu_device *adev = ctx->driver_context; 10207 - int return_status = -1; 10208 10214 struct dmub_notification *p_notify = adev->dm.dmub_notify; 10215 + int ret = -1; 10209 10216 10210 - if (is_cmd_aux) { 10211 - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { 10212 - return_status = p_notify->aux_reply.length; 10213 - *operation_result = p_notify->result; 10214 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) { 10215 - *operation_result = AUX_RET_ERROR_TIMEOUT; 10216 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) { 10217 - *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; 10218 - } else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_INVALID) { 10219 - *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10220 - } else { 10221 - *operation_result = AUX_RET_ERROR_UNKNOWN; 10222 - } 10223 - } else { 10224 - if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) { 10225 - return_status = 0; 10226 - *operation_result = p_notify->sc_status; 10227 - } else { 10228 - *operation_result = SET_CONFIG_UNKNOWN_ERROR; 10229 - } 10217 + mutex_lock(&adev->dm.dpia_aux_lock); 10218 + if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) { 10219 + *operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE; 10220 + goto out; 10221 + } 10222 + 10223 + if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { 10224 + DRM_ERROR("wait_for_completion_timeout timeout!"); 10225 + *operation_result = AUX_RET_ERROR_TIMEOUT; 10226 + goto out; 10230 10227 } 10231 10228 10232 - return return_status; 10229 + if (p_notify->result != AUX_RET_SUCCESS) { 10230 + /* 10231 + * Transient states before tunneling is enabled could 10232 + * lead to this error. We can ignore this for now. 10233 + */ 10234 + if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) { 10235 + DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n", 10236 + payload->address, payload->length, 10237 + p_notify->result); 10238 + } 10239 + *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10240 + goto out; 10241 + } 10242 + 10243 + 10244 + payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; 10245 + if (!payload->write && p_notify->aux_reply.length && 10246 + (payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) { 10247 + 10248 + if (payload->length != p_notify->aux_reply.length) { 10249 + DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n", 10250 + p_notify->aux_reply.length, 10251 + payload->address, payload->length); 10252 + *operation_result = AUX_RET_ERROR_INVALID_REPLY; 10253 + goto out; 10254 + } 10255 + 10256 + memcpy(payload->data, p_notify->aux_reply.data, 10257 + p_notify->aux_reply.length); 10258 + } 10259 + 10260 + /* success */ 10261 + ret = p_notify->aux_reply.length; 10262 + *operation_result = p_notify->result; 10263 + out: 10264 + mutex_unlock(&adev->dm.dpia_aux_lock); 10265 + return ret; 10233 10266 } 10234 10267 10235 - int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx, 10236 - unsigned int link_index, void *cmd_payload, void *operation_result) 10268 + int amdgpu_dm_process_dmub_set_config_sync( 10269 + struct dc_context *ctx, 10270 + unsigned int link_index, 10271 + struct set_config_cmd_payload *payload, 10272 + enum set_config_status *operation_result) 10237 10273 { 10238 10274 struct amdgpu_device *adev = ctx->driver_context; 10239 - int ret = 0; 10275 + bool is_cmd_complete; 10276 + int ret; 10240 10277 10241 - if (is_cmd_aux) { 10242 - dc_process_dmub_aux_transfer_async(ctx->dc, 10243 - link_index, (struct aux_payload *)cmd_payload); 10244 - } else if (dc_process_dmub_set_config_async(ctx->dc, link_index, 10245 - (struct set_config_cmd_payload *)cmd_payload, 10246 - adev->dm.dmub_notify)) { 10247 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10248 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, 10249 - (uint32_t *)operation_result); 10250 - } 10278 + mutex_lock(&adev->dm.dpia_aux_lock); 10279 + is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc, 10280 + link_index, payload, adev->dm.dmub_notify); 10251 10281 10252 - ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ); 10253 - if (ret == 0) { 10282 + if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) { 10283 + ret = 0; 10284 + *operation_result = adev->dm.dmub_notify->sc_status; 10285 + } else { 10254 10286 DRM_ERROR("wait_for_completion_timeout timeout!"); 10255 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10256 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT, 10257 - (uint32_t *)operation_result); 10287 + ret = -1; 10288 + *operation_result = SET_CONFIG_UNKNOWN_ERROR; 10258 10289 } 10259 10290 10260 - if (is_cmd_aux) { 10261 - if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) { 10262 - struct aux_payload *payload = (struct aux_payload *)cmd_payload; 10263 - 10264 - payload->reply[0] = adev->dm.dmub_notify->aux_reply.command; 10265 - if (!payload->write && adev->dm.dmub_notify->aux_reply.length && 10266 - payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) { 10267 - 10268 - if (payload->length != adev->dm.dmub_notify->aux_reply.length) { 10269 - DRM_WARN("invalid read from DPIA AUX %x(%d) got length %d!\n", 10270 - payload->address, payload->length, 10271 - adev->dm.dmub_notify->aux_reply.length); 10272 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, ctx, 10273 - DMUB_ASYNC_TO_SYNC_ACCESS_INVALID, 10274 - (uint32_t *)operation_result); 10275 - } 10276 - 10277 - memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data, 10278 - adev->dm.dmub_notify->aux_reply.length); 10279 - } 10280 - } 10281 - } 10282 - 10283 - return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux, 10284 - ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS, 10285 - (uint32_t *)operation_result); 10291 + mutex_unlock(&adev->dm.dpia_aux_lock); 10292 + return ret; 10286 10293 } 10287 10294 10288 10295 /*
+14 -3
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
··· 59 59 #include "signal_types.h" 60 60 #include "amdgpu_dm_crc.h" 61 61 struct aux_payload; 62 + struct set_config_cmd_payload; 62 63 enum aux_return_code_type; 64 + enum set_config_status; 63 65 64 66 /* Forward declarations */ 65 67 struct amdgpu_device; ··· 544 542 * occurred on certain intel platform 545 543 */ 546 544 bool aux_hpd_discon_quirk; 545 + 546 + /** 547 + * @dpia_aux_lock: 548 + * 549 + * Guards access to DPIA AUX 550 + */ 551 + struct mutex dpia_aux_lock; 547 552 }; 548 553 549 554 enum dsc_clock_force_state { ··· 794 785 795 786 extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs; 796 787 797 - int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, 798 - struct dc_context *ctx, unsigned int link_index, 799 - void *payload, void *operation_result); 788 + int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index, 789 + struct aux_payload *payload, enum aux_return_code_type *operation_result); 790 + 791 + int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index, 792 + struct set_config_cmd_payload *payload, enum set_config_status *operation_result); 800 793 801 794 bool check_seamless_boot_capability(struct amdgpu_device *adev); 802 795
+4 -6
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
··· 817 817 struct aux_payload *payload, 818 818 enum aux_return_code_type *operation_result) 819 819 { 820 - return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx, 821 - link->link_index, (void *)payload, 822 - (void *)operation_result); 820 + return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload, 821 + operation_result); 823 822 } 824 823 825 824 int dm_helpers_dmub_set_config_sync(struct dc_context *ctx, ··· 826 827 struct set_config_cmd_payload *payload, 827 828 enum set_config_status *operation_result) 828 829 { 829 - return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx, 830 - link->link_index, (void *)payload, 831 - (void *)operation_result); 830 + return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload, 831 + operation_result); 832 832 } 833 833 834 834 void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)