Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: ufs: Add history of fatal events

Currently only "interrupt-based" errors have their own history, however
there are some "non-interrupt-based" errors or events which need history
to improve debugging or help know the health status of UFS devices.

Example of fatal errors:

- Link startup error

- Suspend error

- Resume error

Example of abnormal events:

- Task or request abort

- Device reset (now equals to Logical Unit Reset)

- Host reset

This patch tries to track above errors and events by existed UFS error
history mechanism.

Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Stanley Chu and committed by
Martin K. Petersen
8808b4e9 c5397f13

+50 -14
+34 -12
drivers/scsi/ufs/ufshcd.c
··· 429 429 ufshcd_print_err_hist(hba, &hba->ufs_stats.nl_err, "nl_err"); 430 430 ufshcd_print_err_hist(hba, &hba->ufs_stats.tl_err, "tl_err"); 431 431 ufshcd_print_err_hist(hba, &hba->ufs_stats.dme_err, "dme_err"); 432 - ufshcd_print_err_hist(hba, &hba->ufs_stats.fatal_err, "fatal_err"); 433 432 ufshcd_print_err_hist(hba, &hba->ufs_stats.auto_hibern8_err, 434 433 "auto_hibern8_err"); 434 + ufshcd_print_err_hist(hba, &hba->ufs_stats.fatal_err, "fatal_err"); 435 + ufshcd_print_err_hist(hba, &hba->ufs_stats.link_startup_err, 436 + "link_startup_fail"); 437 + ufshcd_print_err_hist(hba, &hba->ufs_stats.resume_err, "resume_fail"); 438 + ufshcd_print_err_hist(hba, &hba->ufs_stats.suspend_err, 439 + "suspend_fail"); 440 + ufshcd_print_err_hist(hba, &hba->ufs_stats.dev_reset, "dev_reset"); 441 + ufshcd_print_err_hist(hba, &hba->ufs_stats.host_reset, "host_reset"); 442 + ufshcd_print_err_hist(hba, &hba->ufs_stats.task_abort, "task_abort"); 435 443 436 444 ufshcd_print_clk_freqs(hba); 437 445 ··· 4338 4330 return ufshcd_disable_tx_lcc(hba, true); 4339 4331 } 4340 4332 4333 + static void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist, 4334 + u32 reg) 4335 + { 4336 + reg_hist->reg[reg_hist->pos] = reg; 4337 + reg_hist->tstamp[reg_hist->pos] = ktime_get(); 4338 + reg_hist->pos = (reg_hist->pos + 1) % UFS_ERR_REG_HIST_LENGTH; 4339 + } 4340 + 4341 4341 /** 4342 4342 * ufshcd_link_startup - Initialize unipro link startup 4343 4343 * @hba: per adapter instance ··· 4373 4357 4374 4358 /* check if device is detected by inter-connect layer */ 4375 4359 if (!ret && !ufshcd_is_device_present(hba)) { 4360 + ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err, 4361 + 0); 4376 4362 dev_err(hba->dev, "%s: Device not present\n", __func__); 4377 4363 ret = -ENXIO; 4378 4364 goto out; ··· 4385 4367 * but we can't be sure if the link is up until link startup 4386 4368 * succeeds. So reset the local Uni-Pro and try again. 4387 4369 */ 4388 - if (ret && ufshcd_hba_enable(hba)) 4370 + if (ret && ufshcd_hba_enable(hba)) { 4371 + ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err, 4372 + (u32)ret); 4389 4373 goto out; 4374 + } 4390 4375 } while (ret && retries--); 4391 4376 4392 - if (ret) 4377 + if (ret) { 4393 4378 /* failed to get the link up... retire */ 4379 + ufshcd_update_reg_hist(&hba->ufs_stats.link_startup_err, 4380 + (u32)ret); 4394 4381 goto out; 4382 + } 4395 4383 4396 4384 if (link_startup_again) { 4397 4385 link_startup_again = false; ··· 5373 5349 pm_runtime_put_sync(hba->dev); 5374 5350 } 5375 5351 5376 - static void ufshcd_update_reg_hist(struct ufs_err_reg_hist *reg_hist, 5377 - u32 reg) 5378 - { 5379 - reg_hist->reg[reg_hist->pos] = reg; 5380 - reg_hist->tstamp[reg_hist->pos] = ktime_get(); 5381 - reg_hist->pos = (reg_hist->pos + 1) % UFS_ERR_REG_HIST_LENGTH; 5382 - } 5383 - 5384 5352 /** 5385 5353 * ufshcd_update_uic_error - check and set fatal UIC error flags. 5386 5354 * @hba: per-adapter instance ··· 5965 5949 5966 5950 out: 5967 5951 hba->req_abort_count = 0; 5952 + ufshcd_update_reg_hist(&hba->ufs_stats.dev_reset, (u32)err); 5968 5953 if (!err) { 5969 5954 err = SUCCESS; 5970 5955 } else { ··· 6059 6042 */ 6060 6043 scsi_print_command(hba->lrb[tag].cmd); 6061 6044 if (!hba->req_abort_count) { 6045 + ufshcd_update_reg_hist(&hba->ufs_stats.task_abort, 0); 6062 6046 ufshcd_print_host_regs(hba); 6063 6047 ufshcd_print_host_state(hba); 6064 6048 ufshcd_print_pwr_info(hba); ··· 6195 6177 out: 6196 6178 if (err) 6197 6179 dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); 6198 - 6180 + ufshcd_update_reg_hist(&hba->ufs_stats.host_reset, (u32)err); 6199 6181 return err; 6200 6182 } 6201 6183 ··· 7837 7819 ufshcd_release(hba); 7838 7820 out: 7839 7821 hba->pm_op_in_progress = 0; 7822 + if (ret) 7823 + ufshcd_update_reg_hist(&hba->ufs_stats.suspend_err, (u32)ret); 7840 7824 return ret; 7841 7825 } 7842 7826 ··· 7941 7921 ufshcd_setup_clocks(hba, false); 7942 7922 out: 7943 7923 hba->pm_op_in_progress = 0; 7924 + if (ret) 7925 + ufshcd_update_reg_hist(&hba->ufs_stats.resume_err, (u32)ret); 7944 7926 return ret; 7945 7927 } 7946 7928
+16 -2
drivers/scsi/ufs/ufshcd.h
··· 436 436 * @nl_err: tracks nl-uic errors 437 437 * @tl_err: tracks tl-uic errors 438 438 * @dme_err: tracks dme errors 439 - * @fatal_err: tracks fatal errors 440 439 * @auto_hibern8_err: tracks auto-hibernate errors 440 + * @fatal_err: tracks fatal errors 441 + * @linkup_err: tracks link-startup errors 442 + * @resume_err: tracks resume errors 443 + * @suspend_err: tracks suspend errors 444 + * @dev_reset: tracks device reset events 445 + * @host_reset: tracks host reset events 446 + * @tsk_abort: tracks task abort events 441 447 */ 442 448 struct ufs_stats { 443 449 u32 hibern8_exit_cnt; ··· 457 451 struct ufs_err_reg_hist dme_err; 458 452 459 453 /* fatal errors */ 460 - struct ufs_err_reg_hist fatal_err; 461 454 struct ufs_err_reg_hist auto_hibern8_err; 455 + struct ufs_err_reg_hist fatal_err; 456 + struct ufs_err_reg_hist link_startup_err; 457 + struct ufs_err_reg_hist resume_err; 458 + struct ufs_err_reg_hist suspend_err; 459 + 460 + /* abnormal events */ 461 + struct ufs_err_reg_hist dev_reset; 462 + struct ufs_err_reg_hist host_reset; 463 + struct ufs_err_reg_hist task_abort; 462 464 }; 463 465 464 466 /**