Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: remove check for CE in RAS error address query

Only RAS UE error address is queried currently, no need to check CE status.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Tao Zhou and committed by
Alex Deucher
cdbb816b faf4d8e0

+55 -86
+3 -7
drivers/gpu/drm/amd/amdgpu/umc_v6_1.c
··· 327 327 return; 328 328 } 329 329 330 - /* calculate error address if ue/ce error is detected */ 330 + /* calculate error address if ue error is detected */ 331 331 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 332 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 333 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 332 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 334 333 335 334 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); 336 335 /* the lowest lsb bits should be ignored */ ··· 342 343 ADDR_OF_256B_BLOCK(channel_index) | 343 344 OFFSET_IN_256B_BLOCK(err_addr); 344 345 345 - /* we only save ue error information currently, ce is skipped */ 346 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 347 - == 1) 348 - amdgpu_umc_fill_error_record(err_data, err_addr, 346 + amdgpu_umc_fill_error_record(err_data, err_addr, 349 347 retired_page, channel_index, umc_inst); 350 348 } 351 349
+26 -37
drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
··· 209 209 if (!err_data->err_addr) 210 210 return; 211 211 212 - /* calculate error address if ue/ce error is detected */ 212 + /* calculate error address if ue error is detected */ 213 213 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 214 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 215 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 214 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 216 215 217 216 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; 218 217 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); ··· 227 228 /* clear [C4 C3 C2] in soc physical address */ 228 229 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); 229 230 230 - /* we only save ue error information currently, ce is skipped */ 231 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 232 - == 1) { 233 - /* loop for all possibilities of [C4 C3 C2] */ 234 - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { 235 - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); 236 - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 237 - amdgpu_umc_fill_error_record(err_data, err_addr, 238 - retired_page, channel_index, umc_inst); 231 + /* loop for all possibilities of [C4 C3 C2] */ 232 + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { 233 + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); 234 + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 235 + amdgpu_umc_fill_error_record(err_data, err_addr, 236 + retired_page, channel_index, umc_inst); 239 237 240 - /* shift R14 bit */ 241 - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); 242 - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 243 - amdgpu_umc_fill_error_record(err_data, err_addr, 244 - retired_page, channel_index, umc_inst); 245 - } 238 + /* shift R14 bit */ 239 + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); 240 + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 241 + amdgpu_umc_fill_error_record(err_data, err_addr, 242 + retired_page, channel_index, umc_inst); 246 243 } 247 244 } 248 245 } ··· 476 481 channel_index = 477 482 adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; 478 483 479 - /* calculate error address if ue/ce error is detected */ 484 + /* calculate error address if ue error is detected */ 480 485 if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 481 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 482 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) || 486 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) || 483 487 mca_addr != UMC_INVALID_ADDR) { 484 488 if (mca_addr == UMC_INVALID_ADDR) { 485 489 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); ··· 499 505 /* clear [C4 C3 C2] in soc physical address */ 500 506 soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); 501 507 502 - /* we only save ue error information currently, ce is skipped */ 503 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 504 - == 1 || 505 - mca_addr != UMC_INVALID_ADDR) { 506 - /* loop for all possibilities of [C4 C3 C2] */ 507 - for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { 508 - retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); 509 - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 510 - amdgpu_umc_fill_error_record(err_data, err_addr, 511 - retired_page, channel_index, umc_inst); 508 + /* loop for all possibilities of [C4 C3 C2] */ 509 + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { 510 + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); 511 + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 512 + amdgpu_umc_fill_error_record(err_data, err_addr, 513 + retired_page, channel_index, umc_inst); 512 514 513 - /* shift R14 bit */ 514 - retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); 515 - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 516 - amdgpu_umc_fill_error_record(err_data, err_addr, 517 - retired_page, channel_index, umc_inst); 518 - } 515 + /* shift R14 bit */ 516 + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); 517 + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); 518 + amdgpu_umc_fill_error_record(err_data, err_addr, 519 + retired_page, channel_index, umc_inst); 519 520 } 520 521 } 521 522
+20 -28
drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
··· 208 208 { 209 209 uint64_t mc_umc_status_addr; 210 210 uint64_t mc_umc_status, err_addr; 211 - uint32_t channel_index; 211 + uint64_t mc_umc_addrt0, na_err_addr_base; 212 + uint64_t na_err_addr, retired_page_addr; 213 + uint32_t channel_index, addr_lsb, col = 0; 214 + int ret = 0; 212 215 213 216 mc_umc_status_addr = 214 217 SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); ··· 232 229 umc_inst * adev->umc.channel_inst_num + 233 230 ch_inst]; 234 231 235 - /* calculate error address if ue/ce error is detected */ 232 + /* calculate error address if ue error is detected */ 236 233 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 237 234 REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && 238 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 239 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 240 - uint32_t addr_lsb; 241 - uint64_t mc_umc_addrt0; 235 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 242 236 243 237 mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); 244 238 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); ··· 243 243 244 244 /* the lowest lsb bits should be ignored */ 245 245 addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb); 246 - 247 246 err_addr &= ~((0x1ULL << addr_lsb) - 1); 247 + na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); 248 248 249 - /* we only save ue error information currently, ce is skipped */ 250 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 251 - uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT); 252 - uint64_t na_err_addr, retired_page_addr; 253 - uint32_t col = 0; 254 - int ret = 0; 249 + /* loop for all possibilities of [C6 C5] in normal address. */ 250 + for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { 251 + na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); 255 252 256 - /* loop for all possibilities of [C6 C5] in normal address. */ 257 - for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) { 258 - na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT); 259 - 260 - /* Mapping normal error address to retired soc physical address. */ 261 - ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, 262 - na_err_addr, &retired_page_addr); 263 - if (ret) { 264 - dev_err(adev->dev, "Failed to map pa from umc na.\n"); 265 - break; 266 - } 267 - dev_info(adev->dev, "Error Address(PA): 0x%llx\n", 268 - retired_page_addr); 269 - amdgpu_umc_fill_error_record(err_data, na_err_addr, 270 - retired_page_addr, channel_index, umc_inst); 253 + /* Mapping normal error address to retired soc physical address. */ 254 + ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index, 255 + na_err_addr, &retired_page_addr); 256 + if (ret) { 257 + dev_err(adev->dev, "Failed to map pa from umc na.\n"); 258 + break; 271 259 } 260 + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", 261 + retired_page_addr); 262 + amdgpu_umc_fill_error_record(err_data, na_err_addr, 263 + retired_page_addr, channel_index, umc_inst); 272 264 } 273 265 } 274 266
+6 -14
drivers/gpu/drm/amd/amdgpu/umc_v8_7.c
··· 130 130 if (!err_data->err_addr) 131 131 return; 132 132 133 - /* calculate error address if ue/ce error is detected */ 133 + /* calculate error address if ue error is detected */ 134 134 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 135 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 136 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 135 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 137 136 138 137 err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; 139 138 err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); ··· 142 143 ADDR_OF_256B_BLOCK(channel_index) | 143 144 OFFSET_IN_256B_BLOCK(err_addr); 144 145 145 - /* we only save ue error information currently, ce is skipped */ 146 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 147 - == 1) 148 - amdgpu_umc_fill_error_record(err_data, err_addr, 146 + amdgpu_umc_fill_error_record(err_data, err_addr, 149 147 retired_page, channel_index, umc_inst); 150 148 } 151 149 } ··· 339 343 return; 340 344 } 341 345 342 - /* calculate error address if ue/ce error is detected */ 346 + /* calculate error address if ue error is detected */ 343 347 if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && 344 - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || 345 - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { 348 + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { 346 349 347 350 err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); 348 351 /* the lowest lsb bits should be ignored */ ··· 354 359 ADDR_OF_256B_BLOCK(channel_index) | 355 360 OFFSET_IN_256B_BLOCK(err_addr); 356 361 357 - /* we only save ue error information currently, ce is skipped */ 358 - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) 359 - == 1) 360 - amdgpu_umc_fill_error_record(err_data, err_addr, 362 + amdgpu_umc_fill_error_record(err_data, err_addr, 361 363 retired_page, channel_index, umc_inst); 362 364 } 363 365