Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amd/display: add DMUB registers to crash dump diagnostic data.

[WHY]
Ability to triage DMCUB is improved with availability of certain
dmub registers not currently captured in crash dump diagnostic data.

[HOW]
Add dmub registers to diagnostic data collection.

Thanks Nicholas Kazlauskas for awesome input on this!

Signed-off-by: Ashley Thomas <Ashley.Thomas2@amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Anson Jacob <Anson.Jacob@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Ashley Thomas and committed by
Alex Deucher
2631ac1a eb945257

+238 -9
+98 -2
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
··· 86 86 87 87 error: 88 88 DC_ERROR("Error queuing DMUB command: status=%d\n", status); 89 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 89 90 } 90 91 91 92 void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv) ··· 96 95 enum dmub_status status; 97 96 98 97 status = dmub_srv_cmd_execute(dmub); 99 - if (status != DMUB_STATUS_OK) 98 + if (status != DMUB_STATUS_OK) { 100 99 DC_ERROR("Error starting DMUB execution: status=%d\n", status); 100 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 101 + } 101 102 } 102 103 103 104 void dc_dmub_srv_wait_idle(struct dc_dmub_srv *dc_dmub_srv) ··· 109 106 enum dmub_status status; 110 107 111 108 status = dmub_srv_wait_for_idle(dmub, 100000); 112 - if (status != DMUB_STATUS_OK) 109 + if (status != DMUB_STATUS_OK) { 113 110 DC_ERROR("Error waiting for DMUB idle: status=%d\n", status); 111 + dc_dmub_srv_log_diagnostic_data(dc_dmub_srv); 112 + } 114 113 } 115 114 116 115 void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, ··· 218 213 void dc_dmub_trace_event_control(struct dc *dc, bool enable) 219 214 { 220 215 dm_helpers_dmub_outbox_interrupt_control(dc->ctx, enable); 216 + } 217 + 218 + bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *diag_data) 219 + { 220 + if (!dc_dmub_srv || !dc_dmub_srv->dmub || !diag_data) 221 + return false; 222 + return dmub_srv_get_diagnostic_data(dc_dmub_srv->dmub, diag_data); 223 + } 224 + 225 + void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv) 226 + { 227 + struct dmub_diagnostic_data diag_data = {0}; 228 + 229 + if (!dc_dmub_srv || !dc_dmub_srv->dmub) { 230 + DC_LOG_ERROR("%s: invalid parameters.", __func__); 231 + return; 232 + } 233 + 234 + if (!dc_dmub_srv_get_diagnostic_data(dc_dmub_srv, &diag_data)) { 235 + DC_LOG_ERROR("%s: dc_dmub_srv_get_diagnostic_data failed.", __func__); 236 + return; 237 + } 238 + 239 + DC_LOG_DEBUG( 240 + "DMCUB STATE\n" 241 + " dmcub_version : %08x\n" 242 + " scratch [0] : %08x\n" 243 + " scratch [1] : %08x\n" 244 + " scratch [2] : %08x\n" 245 + " scratch [3] : %08x\n" 246 + " scratch [4] : %08x\n" 247 + " scratch [5] : %08x\n" 248 + " scratch [6] : %08x\n" 249 + " scratch [7] : %08x\n" 250 + " scratch [8] : %08x\n" 251 + " scratch [9] : %08x\n" 252 + " scratch [10] : %08x\n" 253 + " scratch [11] : %08x\n" 254 + " scratch [12] : %08x\n" 255 + " scratch [13] : %08x\n" 256 + " scratch [14] : %08x\n" 257 + " scratch [15] : %08x\n" 258 + " pc : %08x\n" 259 + " unk_fault_addr : %08x\n" 260 + " inst_fault_addr : %08x\n" 261 + " data_fault_addr : %08x\n" 262 + " inbox1_rptr : %08x\n" 263 + " inbox1_wptr : %08x\n" 264 + " inbox1_size : %08x\n" 265 + " inbox0_rptr : %08x\n" 266 + " inbox0_wptr : %08x\n" 267 + " inbox0_size : %08x\n" 268 + " is_enabled : %d\n" 269 + " is_soft_reset : %d\n" 270 + " is_secure_reset : %d\n" 271 + " is_traceport_en : %d\n" 272 + " is_cw0_en : %d\n" 273 + " is_cw6_en : %d\n", 274 + diag_data.dmcub_version, 275 + diag_data.scratch[0], 276 + diag_data.scratch[1], 277 + diag_data.scratch[2], 278 + diag_data.scratch[3], 279 + diag_data.scratch[4], 280 + diag_data.scratch[5], 281 + diag_data.scratch[6], 282 + diag_data.scratch[7], 283 + diag_data.scratch[8], 284 + diag_data.scratch[9], 285 + diag_data.scratch[10], 286 + diag_data.scratch[11], 287 + diag_data.scratch[12], 288 + diag_data.scratch[13], 289 + diag_data.scratch[14], 290 + diag_data.scratch[15], 291 + diag_data.pc, 292 + diag_data.undefined_address_fault_addr, 293 + diag_data.inst_fetch_fault_addr, 294 + diag_data.data_write_fault_addr, 295 + diag_data.inbox1_rptr, 296 + diag_data.inbox1_wptr, 297 + diag_data.inbox1_size, 298 + diag_data.inbox0_rptr, 299 + diag_data.inbox0_wptr, 300 + diag_data.inbox0_size, 301 + diag_data.is_dmcub_enabled, 302 + diag_data.is_dmcub_soft_reset, 303 + diag_data.is_dmcub_secure_reset, 304 + diag_data.is_traceport_en, 305 + diag_data.is_cw0_enabled, 306 + diag_data.is_cw6_enabled); 221 307 }
+4
drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h
··· 71 71 72 72 void dc_dmub_srv_send_inbox0_cmd(struct dc_dmub_srv *dmub_srv, union dmub_inbox0_data_register data); 73 73 74 + bool dc_dmub_srv_get_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv, struct dmub_diagnostic_data *dmub_oca); 75 + 76 + void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv); 77 + 74 78 #endif /* _DMUB_DC_SRV_H_ */
+29
drivers/gpu/drm/amd/display/dmub/dmub_srv.h
··· 244 244 }; 245 245 246 246 /** 247 + * struct dmub_diagnostic_data - Diagnostic data retrieved from DMCUB for 248 + * debugging purposes, including logging, crash analysis, etc. 249 + */ 250 + struct dmub_diagnostic_data { 251 + uint32_t dmcub_version; 252 + uint32_t scratch[16]; 253 + uint32_t pc; 254 + uint32_t undefined_address_fault_addr; 255 + uint32_t inst_fetch_fault_addr; 256 + uint32_t data_write_fault_addr; 257 + uint32_t inbox1_rptr; 258 + uint32_t inbox1_wptr; 259 + uint32_t inbox1_size; 260 + uint32_t inbox0_rptr; 261 + uint32_t inbox0_wptr; 262 + uint32_t inbox0_size; 263 + uint8_t is_dmcub_enabled : 1; 264 + uint8_t is_dmcub_soft_reset : 1; 265 + uint8_t is_dmcub_secure_reset : 1; 266 + uint8_t is_traceport_en : 1; 267 + uint8_t is_cw0_enabled : 1; 268 + uint8_t is_cw6_enabled : 1; 269 + }; 270 + 271 + /** 247 272 * struct dmub_srv_base_funcs - Driver specific base callbacks 248 273 */ 249 274 struct dmub_srv_base_funcs { ··· 360 335 361 336 void (*send_inbox0_cmd)(struct dmub_srv *dmub, union dmub_inbox0_data_register data); 362 337 uint32_t (*get_current_time)(struct dmub_srv *dmub); 338 + 339 + void (*get_diagnostic_data)(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca); 363 340 }; 364 341 365 342 /** ··· 711 684 union dmub_rb_cmd *cmd); 712 685 713 686 bool dmub_srv_get_outbox0_msg(struct dmub_srv *dmub, struct dmcub_trace_buf_entry *entry); 687 + 688 + bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data); 714 689 715 690 #if defined(__cplusplus) 716 691 }
+64 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.c
··· 40 40 41 41 const struct dmub_srv_common_regs dmub_srv_dcn20_regs = { 42 42 #define DMUB_SR(reg) REG_OFFSET(reg), 43 - { DMUB_COMMON_REGS() }, 43 + { 44 + DMUB_COMMON_REGS() 45 + DMCUB_INTERNAL_REGS() 46 + }, 44 47 #undef DMUB_SR 45 48 46 49 #define DMUB_SF(reg, field) FD_MASK(reg, field), ··· 406 403 uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub) 407 404 { 408 405 return REG_READ(DMCUB_TIMER_CURRENT); 406 + } 407 + 408 + void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) 409 + { 410 + uint32_t is_dmub_enabled, is_soft_reset, is_sec_reset; 411 + uint32_t is_traceport_enabled, is_cw0_enabled, is_cw6_enabled; 412 + 413 + if (!dmub || !diag_data) 414 + return; 415 + 416 + memset(diag_data, 0, sizeof(*diag_data)); 417 + 418 + diag_data->dmcub_version = dmub->fw_version; 419 + 420 + diag_data->scratch[0] = REG_READ(DMCUB_SCRATCH0); 421 + diag_data->scratch[1] = REG_READ(DMCUB_SCRATCH1); 422 + diag_data->scratch[2] = REG_READ(DMCUB_SCRATCH2); 423 + diag_data->scratch[3] = REG_READ(DMCUB_SCRATCH3); 424 + diag_data->scratch[4] = REG_READ(DMCUB_SCRATCH4); 425 + diag_data->scratch[5] = REG_READ(DMCUB_SCRATCH5); 426 + diag_data->scratch[6] = REG_READ(DMCUB_SCRATCH6); 427 + diag_data->scratch[7] = REG_READ(DMCUB_SCRATCH7); 428 + diag_data->scratch[8] = REG_READ(DMCUB_SCRATCH8); 429 + diag_data->scratch[9] = REG_READ(DMCUB_SCRATCH9); 430 + diag_data->scratch[10] = REG_READ(DMCUB_SCRATCH10); 431 + diag_data->scratch[11] = REG_READ(DMCUB_SCRATCH11); 432 + diag_data->scratch[12] = REG_READ(DMCUB_SCRATCH12); 433 + diag_data->scratch[13] = REG_READ(DMCUB_SCRATCH13); 434 + diag_data->scratch[14] = REG_READ(DMCUB_SCRATCH14); 435 + diag_data->scratch[15] = REG_READ(DMCUB_SCRATCH15); 436 + 437 + diag_data->undefined_address_fault_addr = REG_READ(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR); 438 + diag_data->inst_fetch_fault_addr = REG_READ(DMCUB_INST_FETCH_FAULT_ADDR); 439 + diag_data->data_write_fault_addr = REG_READ(DMCUB_DATA_WRITE_FAULT_ADDR); 440 + 441 + diag_data->inbox1_rptr = REG_READ(DMCUB_INBOX1_RPTR); 442 + diag_data->inbox1_wptr = REG_READ(DMCUB_INBOX1_WPTR); 443 + diag_data->inbox1_size = REG_READ(DMCUB_INBOX1_SIZE); 444 + 445 + diag_data->inbox0_rptr = REG_READ(DMCUB_INBOX0_RPTR); 446 + diag_data->inbox0_wptr = REG_READ(DMCUB_INBOX0_WPTR); 447 + diag_data->inbox0_size = REG_READ(DMCUB_INBOX0_SIZE); 448 + 449 + REG_GET(DMCUB_CNTL, DMCUB_ENABLE, &is_dmub_enabled); 450 + diag_data->is_dmcub_enabled = is_dmub_enabled; 451 + 452 + REG_GET(DMCUB_CNTL, DMCUB_SOFT_RESET, &is_soft_reset); 453 + diag_data->is_dmcub_soft_reset = is_soft_reset; 454 + 455 + REG_GET(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS, &is_sec_reset); 456 + diag_data->is_dmcub_secure_reset = is_sec_reset; 457 + 458 + REG_GET(DMCUB_CNTL, DMCUB_TRACEPORT_EN, &is_traceport_enabled); 459 + diag_data->is_traceport_en = is_traceport_enabled; 460 + 461 + REG_GET(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE, &is_cw0_enabled); 462 + diag_data->is_cw0_enabled = is_cw0_enabled; 463 + 464 + REG_GET(DMCUB_REGION3_CW6_TOP_ADDRESS, DMCUB_REGION3_CW6_ENABLE, &is_cw6_enabled); 465 + diag_data->is_cw6_enabled = is_cw6_enabled; 409 466 }
+13 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn20.h
··· 36 36 DMUB_SR(DMCUB_CNTL) \ 37 37 DMUB_SR(DMCUB_MEM_CNTL) \ 38 38 DMUB_SR(DMCUB_SEC_CNTL) \ 39 + DMUB_SR(DMCUB_INBOX0_SIZE) \ 40 + DMUB_SR(DMCUB_INBOX0_RPTR) \ 41 + DMUB_SR(DMCUB_INBOX0_WPTR) \ 39 42 DMUB_SR(DMCUB_INBOX1_BASE_ADDRESS) \ 40 43 DMUB_SR(DMCUB_INBOX1_SIZE) \ 41 44 DMUB_SR(DMCUB_INBOX1_RPTR) \ ··· 111 108 DMUB_SR(DCN_VM_FB_LOCATION_BASE) \ 112 109 DMUB_SR(DCN_VM_FB_OFFSET) \ 113 110 DMUB_SR(DMCUB_INTERRUPT_ACK) \ 114 - DMUB_SR(DMCUB_TIMER_CURRENT) 111 + DMUB_SR(DMCUB_TIMER_CURRENT) \ 112 + DMUB_SR(DMCUB_INST_FETCH_FAULT_ADDR) \ 113 + DMUB_SR(DMCUB_UNDEFINED_ADDRESS_FAULT_ADDR) \ 114 + DMUB_SR(DMCUB_DATA_WRITE_FAULT_ADDR) 115 + 116 + #define DMCUB_INTERNAL_REGS() 115 117 116 118 #define DMUB_COMMON_FIELDS() \ 117 119 DMUB_SF(DMCUB_CNTL, DMCUB_ENABLE) \ ··· 126 118 DMUB_SF(DMCUB_MEM_CNTL, DMCUB_MEM_WRITE_SPACE) \ 127 119 DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET) \ 128 120 DMUB_SF(DMCUB_SEC_CNTL, DMCUB_MEM_UNIT_ID) \ 121 + DMUB_SF(DMCUB_SEC_CNTL, DMCUB_SEC_RESET_STATUS) \ 129 122 DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_TOP_ADDRESS) \ 130 123 DMUB_SF(DMCUB_REGION3_CW0_TOP_ADDRESS, DMCUB_REGION3_CW0_ENABLE) \ 131 124 DMUB_SF(DMCUB_REGION3_CW1_TOP_ADDRESS, DMCUB_REGION3_CW1_TOP_ADDRESS) \ ··· 156 147 struct dmub_srv_common_reg_offset { 157 148 #define DMUB_SR(reg) uint32_t reg; 158 149 DMUB_COMMON_REGS() 150 + DMCUB_INTERNAL_REGS() 159 151 #undef DMUB_SR 160 152 }; 161 153 ··· 243 233 bool dmub_dcn20_use_cached_trace_buffer(struct dmub_srv *dmub); 244 234 245 235 uint32_t dmub_dcn20_get_current_time(struct dmub_srv *dmub); 236 + 237 + void dmub_dcn20_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *dmub_oca); 246 238 247 239 #endif /* _DMUB_DCN20_H_ */
+4 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn21.c
··· 39 39 40 40 const struct dmub_srv_common_regs dmub_srv_dcn21_regs = { 41 41 #define DMUB_SR(reg) REG_OFFSET(reg), 42 - { DMUB_COMMON_REGS() }, 42 + { 43 + DMUB_COMMON_REGS() 44 + DMCUB_INTERNAL_REGS() 45 + }, 43 46 #undef DMUB_SR 44 47 45 48 #define DMUB_SF(reg, field) FD_MASK(reg, field),
+4 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn30.c
··· 40 40 41 41 const struct dmub_srv_common_regs dmub_srv_dcn30_regs = { 42 42 #define DMUB_SR(reg) REG_OFFSET(reg), 43 - { DMUB_COMMON_REGS() }, 43 + { 44 + DMUB_COMMON_REGS() 45 + DMCUB_INTERNAL_REGS() 46 + }, 44 47 #undef DMUB_SR 45 48 46 49 #define DMUB_SF(reg, field) FD_MASK(reg, field),
+4 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn301.c
··· 39 39 40 40 const struct dmub_srv_common_regs dmub_srv_dcn301_regs = { 41 41 #define DMUB_SR(reg) REG_OFFSET(reg), 42 - { DMUB_COMMON_REGS() }, 42 + { 43 + DMUB_COMMON_REGS() 44 + DMCUB_INTERNAL_REGS() 45 + }, 43 46 #undef DMUB_SR 44 47 45 48 #define DMUB_SF(reg, field) FD_MASK(reg, field),
+4 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn302.c
··· 39 39 40 40 const struct dmub_srv_common_regs dmub_srv_dcn302_regs = { 41 41 #define DMUB_SR(reg) REG_OFFSET(reg), 42 - { DMUB_COMMON_REGS() }, 42 + { 43 + DMUB_COMMON_REGS() 44 + DMCUB_INTERNAL_REGS() 45 + }, 43 46 #undef DMUB_SR 44 47 45 48 #define DMUB_SF(reg, field) FD_MASK(reg, field),
+4 -1
drivers/gpu/drm/amd/display/dmub/src/dmub_dcn303.c
··· 21 21 22 22 const struct dmub_srv_common_regs dmub_srv_dcn303_regs = { 23 23 #define DMUB_SR(reg) REG_OFFSET(reg), 24 - { DMUB_COMMON_REGS() }, 24 + { 25 + DMUB_COMMON_REGS() 26 + DMCUB_INTERNAL_REGS() 27 + }, 25 28 #undef DMUB_SR 26 29 27 30 #define DMUB_SF(reg, field) FD_MASK(reg, field),
+10
drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c
··· 176 176 funcs->get_outbox0_wptr = dmub_dcn20_get_outbox0_wptr; 177 177 funcs->set_outbox0_rptr = dmub_dcn20_set_outbox0_rptr; 178 178 179 + funcs->get_diagnostic_data = dmub_dcn20_get_diagnostic_data; 180 + 179 181 if (asic == DMUB_ASIC_DCN21) { 180 182 dmub->regs = &dmub_srv_dcn21_regs; 181 183 ··· 795 793 dmub->outbox0_rb.wrpt = dmub->hw_funcs.get_outbox0_wptr(dmub); 796 794 797 795 return dmub_rb_out_trace_buffer_front(&dmub->outbox0_rb, (void *)entry); 796 + } 797 + 798 + bool dmub_srv_get_diagnostic_data(struct dmub_srv *dmub, struct dmub_diagnostic_data *diag_data) 799 + { 800 + if (!dmub || !dmub->hw_funcs.get_diagnostic_data || !diag_data) 801 + return false; 802 + dmub->hw_funcs.get_diagnostic_data(dmub, diag_data); 803 + return true; 798 804 }