Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ACPI, APEI, CPER: Cleanup CPER memory error output format

Memory error reporting is much too verbose. Most users do not care about
the DIMM internal bank/row/column information. Downgrade the fine details
to "pr_debug" status so that those few who do care can get them if they
really want to. The detail information will be later be provided by
perf/trace interface.
Since things are still a bit scary, and users are sometimes overly
nervous, provide a reassuring message that corrected errors do not
generally require any further action.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Reviewed-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by

Chen, Gong and committed by
Tony Luck
f6edea77 fbeef85f

+31 -36
+31 -36
drivers/acpi/apei/cper.c
··· 33 33 #include <linux/pci.h> 34 34 #include <linux/aer.h> 35 35 36 + #define INDENT_SP " " 36 37 /* 37 38 * CPER record ID need to be unique even after reboot, because record 38 39 * ID is used as index for ERST storage, while CPER records from ··· 207 206 printk("%s""physical_address_mask: 0x%016llx\n", 208 207 pfx, mem->physical_addr_mask); 209 208 if (mem->validation_bits & CPER_MEM_VALID_NODE) 210 - printk("%s""node: %d\n", pfx, mem->node); 209 + pr_debug("node: %d\n", mem->node); 211 210 if (mem->validation_bits & CPER_MEM_VALID_CARD) 212 - printk("%s""card: %d\n", pfx, mem->card); 211 + pr_debug("card: %d\n", mem->card); 213 212 if (mem->validation_bits & CPER_MEM_VALID_MODULE) 214 - printk("%s""module: %d\n", pfx, mem->module); 213 + pr_debug("module: %d\n", mem->module); 215 214 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) 216 - printk("%s""rank: %d\n", pfx, mem->rank); 215 + pr_debug("rank: %d\n", mem->rank); 217 216 if (mem->validation_bits & CPER_MEM_VALID_BANK) 218 - printk("%s""bank: %d\n", pfx, mem->bank); 217 + pr_debug("bank: %d\n", mem->bank); 219 218 if (mem->validation_bits & CPER_MEM_VALID_DEVICE) 220 - printk("%s""device: %d\n", pfx, mem->device); 219 + pr_debug("device: %d\n", mem->device); 221 220 if (mem->validation_bits & CPER_MEM_VALID_ROW) 222 - printk("%s""row: %d\n", pfx, mem->row); 221 + pr_debug("row: %d\n", mem->row); 223 222 if (mem->validation_bits & CPER_MEM_VALID_COLUMN) 224 - printk("%s""column: %d\n", pfx, mem->column); 223 + pr_debug("column: %d\n", mem->column); 225 224 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) 226 - printk("%s""bit_position: %d\n", pfx, mem->bit_pos); 225 + pr_debug("bit_position: %d\n", mem->bit_pos); 227 226 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) 228 - printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id); 227 + pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); 229 228 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) 230 - printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id); 229 + pr_debug("responder_id: 0x%016llx\n", mem->responder_id); 231 230 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) 232 - printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id); 231 + pr_debug("target_id: 0x%016llx\n", mem->target_id); 233 232 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { 234 233 u8 etype = mem->error_type; 235 234 printk("%s""error_type: %d, %s\n", pfx, etype, ··· 297 296 pfx, pcie->bridge.secondary_status, pcie->bridge.control); 298 297 } 299 298 300 - static const char * const cper_estatus_section_flag_strs[] = { 301 - "primary", 302 - "containment warning", 303 - "reset", 304 - "error threshold exceeded", 305 - "resource not accessible", 306 - "latent error", 307 - }; 308 - 309 299 static void cper_estatus_print_section( 310 300 const char *pfx, const struct acpi_generic_data *gdata, int sec_no) 311 301 { 312 302 uuid_le *sec_type = (uuid_le *)gdata->section_type; 313 303 __u16 severity; 304 + char newpfx[64]; 314 305 315 306 severity = gdata->error_severity; 316 - printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity, 307 + printk("%s""Error %d, type: %s\n", pfx, sec_no, 317 308 cper_severity_str(severity)); 318 - printk("%s""flags: 0x%02x\n", pfx, gdata->flags); 319 - cper_print_bits(pfx, gdata->flags, cper_estatus_section_flag_strs, 320 - ARRAY_SIZE(cper_estatus_section_flag_strs)); 321 309 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) 322 310 printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id); 323 311 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) 324 312 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); 325 313 314 + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 326 315 if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) { 327 316 struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1); 328 - printk("%s""section_type: general processor error\n", pfx); 317 + printk("%s""section_type: general processor error\n", newpfx); 329 318 if (gdata->error_data_length >= sizeof(*proc_err)) 330 - cper_print_proc_generic(pfx, proc_err); 319 + cper_print_proc_generic(newpfx, proc_err); 331 320 else 332 321 goto err_section_too_small; 333 322 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { 334 323 struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); 335 - printk("%s""section_type: memory error\n", pfx); 324 + printk("%s""section_type: memory error\n", newpfx); 336 325 if (gdata->error_data_length >= sizeof(*mem_err)) 337 - cper_print_mem(pfx, mem_err); 326 + cper_print_mem(newpfx, mem_err); 338 327 else 339 328 goto err_section_too_small; 340 329 } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { 341 330 struct cper_sec_pcie *pcie = (void *)(gdata + 1); 342 - printk("%s""section_type: PCIe error\n", pfx); 331 + printk("%s""section_type: PCIe error\n", newpfx); 343 332 if (gdata->error_data_length >= sizeof(*pcie)) 344 - cper_print_pcie(pfx, pcie, gdata); 333 + cper_print_pcie(newpfx, pcie, gdata); 345 334 else 346 335 goto err_section_too_small; 347 336 } else 348 - printk("%s""section type: unknown, %pUl\n", pfx, sec_type); 337 + printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); 349 338 350 339 return; 351 340 ··· 349 358 struct acpi_generic_data *gdata; 350 359 unsigned int data_len, gedata_len; 351 360 int sec_no = 0; 361 + char newpfx[64]; 352 362 __u16 severity; 353 363 354 - printk("%s""Generic Hardware Error Status\n", pfx); 355 364 severity = estatus->error_severity; 356 - printk("%s""severity: %d, %s\n", pfx, severity, 357 - cper_severity_str(severity)); 365 + if (severity == CPER_SEV_CORRECTED) 366 + printk("%s%s\n", pfx, 367 + "It has been corrected by h/w " 368 + "and requires no further action"); 369 + printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); 358 370 data_len = estatus->data_length; 359 371 gdata = (struct acpi_generic_data *)(estatus + 1); 372 + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); 360 373 while (data_len >= sizeof(*gdata)) { 361 374 gedata_len = gdata->error_data_length; 362 - cper_estatus_print_section(pfx, gdata, sec_no); 375 + cper_estatus_print_section(newpfx, gdata, sec_no); 363 376 data_len -= gedata_len + sizeof(*gdata); 364 377 gdata = (void *)(gdata + 1) + gedata_len; 365 378 sec_no++;