Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: store only one RAS bad page record for all pages in one row

So eeprom space can be saved, compatible with legacy way.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Tao Zhou and committed by
Alex Deucher
c3d4acf0 e1ee2111

+27 -8
+27 -8
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
··· 2849 2849 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); 2850 2850 struct ras_err_handler_data *data; 2851 2851 struct amdgpu_ras_eeprom_control *control; 2852 - int save_count; 2852 + int save_count, unit_num, bad_page_num, i; 2853 2853 2854 2854 if (!con || !con->eh_data) { 2855 2855 if (new_cnt) ··· 2861 2861 mutex_lock(&con->recovery_lock); 2862 2862 control = &con->eeprom_control; 2863 2863 data = con->eh_data; 2864 - save_count = data->count - control->ras_num_recs; 2864 + bad_page_num = control->ras_num_recs; 2865 + /* one record on eeprom stands for all pages in one memory row 2866 + * in this mode 2867 + */ 2868 + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) 2869 + bad_page_num = control->ras_num_recs * adev->umc.retire_unit; 2870 + 2871 + save_count = data->count - bad_page_num; 2865 2872 mutex_unlock(&con->recovery_lock); 2866 2873 2874 + unit_num = save_count / adev->umc.retire_unit; 2867 2875 if (new_cnt) 2868 - *new_cnt = save_count / adev->umc.retire_unit; 2876 + *new_cnt = unit_num; 2869 2877 2870 2878 /* only new entries are saved */ 2871 2879 if (save_count > 0) { 2872 - if (amdgpu_ras_eeprom_append(control, 2873 - &data->bps[control->ras_num_recs], 2874 - save_count)) { 2875 - dev_err(adev->dev, "Failed to save EEPROM table data!"); 2876 - return -EIO; 2880 + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) { 2881 + if (amdgpu_ras_eeprom_append(control, 2882 + &data->bps[control->ras_num_recs], 2883 + save_count)) { 2884 + dev_err(adev->dev, "Failed to save EEPROM table data!"); 2885 + return -EIO; 2886 + } 2887 + } else { 2888 + for (i = 0; i < unit_num; i++) { 2889 + if (amdgpu_ras_eeprom_append(control, 2890 + &data->bps[bad_page_num + i * adev->umc.retire_unit], 2891 + 1)) { 2892 + dev_err(adev->dev, "Failed to save EEPROM table data!"); 2893 + return -EIO; 2894 + } 2895 + } 2877 2896 } 2878 2897 2879 2898 dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);