Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * UEFI Common Platform Error Record (CPER) support
4 *
5 * Copyright (C) 2010, Intel Corp.
6 * Author: Huang Ying <ying.huang@intel.com>
7 *
8 * CPER is the format used to describe platform hardware error by
9 * various tables, such as ERST, BERT and HEST etc.
10 *
11 * For more information about CPER, please refer to Appendix N of UEFI
12 * Specification version 2.4.
13 */
14
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/time.h>
18#include <linux/cper.h>
19#include <linux/dmi.h>
20#include <linux/acpi.h>
21#include <linux/pci.h>
22#include <linux/aer.h>
23#include <linux/printk.h>
24#include <linux/bcd.h>
25#include <acpi/ghes.h>
26#include <ras/ras_event.h>
27
28static char rcd_decode_str[CPER_REC_LEN];
29
30/*
31 * CPER record ID need to be unique even after reboot, because record
32 * ID is used as index for ERST storage, while CPER records from
33 * multiple boot may co-exist in ERST.
34 */
35u64 cper_next_record_id(void)
36{
37 static atomic64_t seq;
38
39 if (!atomic64_read(&seq)) {
40 time64_t time = ktime_get_real_seconds();
41
42 /*
43 * This code is unlikely to still be needed in year 2106,
44 * but just in case, let's use a few more bits for timestamps
45 * after y2038 to be sure they keep increasing monotonically
46 * for the next few hundred years...
47 */
48 if (time < 0x80000000)
49 atomic64_set(&seq, (ktime_get_real_seconds()) << 32);
50 else
51 atomic64_set(&seq, 0x8000000000000000ull |
52 ktime_get_real_seconds() << 24);
53 }
54
55 return atomic64_inc_return(&seq);
56}
57EXPORT_SYMBOL_GPL(cper_next_record_id);
58
59static const char * const severity_strs[] = {
60 "recoverable",
61 "fatal",
62 "corrected",
63 "info",
64};
65
66const char *cper_severity_str(unsigned int severity)
67{
68 return severity < ARRAY_SIZE(severity_strs) ?
69 severity_strs[severity] : "unknown";
70}
71EXPORT_SYMBOL_GPL(cper_severity_str);
72
73/*
74 * cper_print_bits - print strings for set bits
75 * @pfx: prefix for each line, including log level and prefix string
76 * @bits: bit mask
77 * @strs: string array, indexed by bit position
78 * @strs_size: size of the string array: @strs
79 *
80 * For each set bit in @bits, print the corresponding string in @strs.
81 * If the output length is longer than 80, multiple line will be
82 * printed, with @pfx is printed at the beginning of each line.
83 */
84void cper_print_bits(const char *pfx, unsigned int bits,
85 const char * const strs[], unsigned int strs_size)
86{
87 int i, len = 0;
88 const char *str;
89 char buf[84];
90
91 for (i = 0; i < strs_size; i++) {
92 if (!(bits & (1U << i)))
93 continue;
94 str = strs[i];
95 if (!str)
96 continue;
97 if (len && len + strlen(str) + 2 > 80) {
98 printk("%s\n", buf);
99 len = 0;
100 }
101 if (!len)
102 len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
103 else
104 len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
105 }
106 if (len)
107 printk("%s\n", buf);
108}
109
110static const char * const proc_type_strs[] = {
111 "IA32/X64",
112 "IA64",
113 "ARM",
114};
115
116static const char * const proc_isa_strs[] = {
117 "IA32",
118 "IA64",
119 "X64",
120 "ARM A32/T32",
121 "ARM A64",
122};
123
124const char * const cper_proc_error_type_strs[] = {
125 "cache error",
126 "TLB error",
127 "bus error",
128 "micro-architectural error",
129};
130
131static const char * const proc_op_strs[] = {
132 "unknown or generic",
133 "data read",
134 "data write",
135 "instruction execution",
136};
137
138static const char * const proc_flag_strs[] = {
139 "restartable",
140 "precise IP",
141 "overflow",
142 "corrected",
143};
144
145static void cper_print_proc_generic(const char *pfx,
146 const struct cper_sec_proc_generic *proc)
147{
148 if (proc->validation_bits & CPER_PROC_VALID_TYPE)
149 printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
150 proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
151 proc_type_strs[proc->proc_type] : "unknown");
152 if (proc->validation_bits & CPER_PROC_VALID_ISA)
153 printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
154 proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
155 proc_isa_strs[proc->proc_isa] : "unknown");
156 if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
157 printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
158 cper_print_bits(pfx, proc->proc_error_type,
159 cper_proc_error_type_strs,
160 ARRAY_SIZE(cper_proc_error_type_strs));
161 }
162 if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
163 printk("%s""operation: %d, %s\n", pfx, proc->operation,
164 proc->operation < ARRAY_SIZE(proc_op_strs) ?
165 proc_op_strs[proc->operation] : "unknown");
166 if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
167 printk("%s""flags: 0x%02x\n", pfx, proc->flags);
168 cper_print_bits(pfx, proc->flags, proc_flag_strs,
169 ARRAY_SIZE(proc_flag_strs));
170 }
171 if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
172 printk("%s""level: %d\n", pfx, proc->level);
173 if (proc->validation_bits & CPER_PROC_VALID_VERSION)
174 printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
175 if (proc->validation_bits & CPER_PROC_VALID_ID)
176 printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
177 if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS)
178 printk("%s""target_address: 0x%016llx\n",
179 pfx, proc->target_addr);
180 if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID)
181 printk("%s""requestor_id: 0x%016llx\n",
182 pfx, proc->requestor_id);
183 if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID)
184 printk("%s""responder_id: 0x%016llx\n",
185 pfx, proc->responder_id);
186 if (proc->validation_bits & CPER_PROC_VALID_IP)
187 printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
188}
189
190static const char * const mem_err_type_strs[] = {
191 "unknown",
192 "no error",
193 "single-bit ECC",
194 "multi-bit ECC",
195 "single-symbol chipkill ECC",
196 "multi-symbol chipkill ECC",
197 "master abort",
198 "target abort",
199 "parity error",
200 "watchdog timeout",
201 "invalid address",
202 "mirror Broken",
203 "memory sparing",
204 "scrub corrected error",
205 "scrub uncorrected error",
206 "physical memory map-out event",
207};
208
209const char *cper_mem_err_type_str(unsigned int etype)
210{
211 return etype < ARRAY_SIZE(mem_err_type_strs) ?
212 mem_err_type_strs[etype] : "unknown";
213}
214EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
215
216static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
217{
218 u32 len, n;
219
220 if (!msg)
221 return 0;
222
223 n = 0;
224 len = CPER_REC_LEN - 1;
225 if (mem->validation_bits & CPER_MEM_VALID_NODE)
226 n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
227 if (mem->validation_bits & CPER_MEM_VALID_CARD)
228 n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
229 if (mem->validation_bits & CPER_MEM_VALID_MODULE)
230 n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
231 if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
232 n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
233 if (mem->validation_bits & CPER_MEM_VALID_BANK)
234 n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
235 if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
236 n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
237 if (mem->validation_bits & CPER_MEM_VALID_ROW)
238 n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
239 if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
240 n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
241 if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
242 n += scnprintf(msg + n, len - n, "bit_position: %d ",
243 mem->bit_pos);
244 if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
245 n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
246 mem->requestor_id);
247 if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
248 n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
249 mem->responder_id);
250 if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
251 scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
252 mem->target_id);
253
254 msg[n] = '\0';
255 return n;
256}
257
258static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
259{
260 u32 len, n;
261 const char *bank = NULL, *device = NULL;
262
263 if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
264 return 0;
265
266 n = 0;
267 len = CPER_REC_LEN - 1;
268 dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
269 if (bank && device)
270 n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
271 else
272 n = snprintf(msg, len,
273 "DIMM location: not present. DMI handle: 0x%.4x ",
274 mem->mem_dev_handle);
275
276 msg[n] = '\0';
277 return n;
278}
279
280void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
281 struct cper_mem_err_compact *cmem)
282{
283 cmem->validation_bits = mem->validation_bits;
284 cmem->node = mem->node;
285 cmem->card = mem->card;
286 cmem->module = mem->module;
287 cmem->bank = mem->bank;
288 cmem->device = mem->device;
289 cmem->row = mem->row;
290 cmem->column = mem->column;
291 cmem->bit_pos = mem->bit_pos;
292 cmem->requestor_id = mem->requestor_id;
293 cmem->responder_id = mem->responder_id;
294 cmem->target_id = mem->target_id;
295 cmem->rank = mem->rank;
296 cmem->mem_array_handle = mem->mem_array_handle;
297 cmem->mem_dev_handle = mem->mem_dev_handle;
298}
299
300const char *cper_mem_err_unpack(struct trace_seq *p,
301 struct cper_mem_err_compact *cmem)
302{
303 const char *ret = trace_seq_buffer_ptr(p);
304
305 if (cper_mem_err_location(cmem, rcd_decode_str))
306 trace_seq_printf(p, "%s", rcd_decode_str);
307 if (cper_dimm_err_location(cmem, rcd_decode_str))
308 trace_seq_printf(p, "%s", rcd_decode_str);
309 trace_seq_putc(p, '\0');
310
311 return ret;
312}
313
314static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
315 int len)
316{
317 struct cper_mem_err_compact cmem;
318
319 /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */
320 if (len == sizeof(struct cper_sec_mem_err_old) &&
321 (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) {
322 pr_err(FW_WARN "valid bits set for fields beyond structure\n");
323 return;
324 }
325 if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
326 printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
327 if (mem->validation_bits & CPER_MEM_VALID_PA)
328 printk("%s""physical_address: 0x%016llx\n",
329 pfx, mem->physical_addr);
330 if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
331 printk("%s""physical_address_mask: 0x%016llx\n",
332 pfx, mem->physical_addr_mask);
333 cper_mem_err_pack(mem, &cmem);
334 if (cper_mem_err_location(&cmem, rcd_decode_str))
335 printk("%s%s\n", pfx, rcd_decode_str);
336 if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
337 u8 etype = mem->error_type;
338 printk("%s""error_type: %d, %s\n", pfx, etype,
339 cper_mem_err_type_str(etype));
340 }
341 if (cper_dimm_err_location(&cmem, rcd_decode_str))
342 printk("%s%s\n", pfx, rcd_decode_str);
343}
344
345static const char * const pcie_port_type_strs[] = {
346 "PCIe end point",
347 "legacy PCI end point",
348 "unknown",
349 "unknown",
350 "root port",
351 "upstream switch port",
352 "downstream switch port",
353 "PCIe to PCI/PCI-X bridge",
354 "PCI/PCI-X to PCIe bridge",
355 "root complex integrated endpoint device",
356 "root complex event collector",
357};
358
359static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
360 const struct acpi_hest_generic_data *gdata)
361{
362 if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
363 printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
364 pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
365 pcie_port_type_strs[pcie->port_type] : "unknown");
366 if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
367 printk("%s""version: %d.%d\n", pfx,
368 pcie->version.major, pcie->version.minor);
369 if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS)
370 printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
371 pcie->command, pcie->status);
372 if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) {
373 const __u8 *p;
374 printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
375 pcie->device_id.segment, pcie->device_id.bus,
376 pcie->device_id.device, pcie->device_id.function);
377 printk("%s""slot: %d\n", pfx,
378 pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
379 printk("%s""secondary_bus: 0x%02x\n", pfx,
380 pcie->device_id.secondary_bus);
381 printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
382 pcie->device_id.vendor_id, pcie->device_id.device_id);
383 p = pcie->device_id.class_code;
384 printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
385 }
386 if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
387 printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
388 pcie->serial_number.lower, pcie->serial_number.upper);
389 if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS)
390 printk(
391 "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
392 pfx, pcie->bridge.secondary_status, pcie->bridge.control);
393}
394
395static void cper_print_tstamp(const char *pfx,
396 struct acpi_hest_generic_data_v300 *gdata)
397{
398 __u8 hour, min, sec, day, mon, year, century, *timestamp;
399
400 if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
401 timestamp = (__u8 *)&(gdata->time_stamp);
402 sec = bcd2bin(timestamp[0]);
403 min = bcd2bin(timestamp[1]);
404 hour = bcd2bin(timestamp[2]);
405 day = bcd2bin(timestamp[4]);
406 mon = bcd2bin(timestamp[5]);
407 year = bcd2bin(timestamp[6]);
408 century = bcd2bin(timestamp[7]);
409
410 printk("%s%ststamp: %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
411 (timestamp[3] & 0x1 ? "precise " : "imprecise "),
412 century, year, mon, day, hour, min, sec);
413 }
414}
415
416static void
417cper_estatus_print_section(const char *pfx, struct acpi_hest_generic_data *gdata,
418 int sec_no)
419{
420 guid_t *sec_type = (guid_t *)gdata->section_type;
421 __u16 severity;
422 char newpfx[64];
423
424 if (acpi_hest_get_version(gdata) >= 3)
425 cper_print_tstamp(pfx, (struct acpi_hest_generic_data_v300 *)gdata);
426
427 severity = gdata->error_severity;
428 printk("%s""Error %d, type: %s\n", pfx, sec_no,
429 cper_severity_str(severity));
430 if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
431 printk("%s""fru_id: %pUl\n", pfx, gdata->fru_id);
432 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
433 printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
434
435 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
436 if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) {
437 struct cper_sec_proc_generic *proc_err = acpi_hest_get_payload(gdata);
438
439 printk("%s""section_type: general processor error\n", newpfx);
440 if (gdata->error_data_length >= sizeof(*proc_err))
441 cper_print_proc_generic(newpfx, proc_err);
442 else
443 goto err_section_too_small;
444 } else if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
445 struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
446
447 printk("%s""section_type: memory error\n", newpfx);
448 if (gdata->error_data_length >=
449 sizeof(struct cper_sec_mem_err_old))
450 cper_print_mem(newpfx, mem_err,
451 gdata->error_data_length);
452 else
453 goto err_section_too_small;
454 } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
455 struct cper_sec_pcie *pcie = acpi_hest_get_payload(gdata);
456
457 printk("%s""section_type: PCIe error\n", newpfx);
458 if (gdata->error_data_length >= sizeof(*pcie))
459 cper_print_pcie(newpfx, pcie, gdata);
460 else
461 goto err_section_too_small;
462#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
463 } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
464 struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);
465
466 printk("%ssection_type: ARM processor error\n", newpfx);
467 if (gdata->error_data_length >= sizeof(*arm_err))
468 cper_print_proc_arm(newpfx, arm_err);
469 else
470 goto err_section_too_small;
471#endif
472#if defined(CONFIG_UEFI_CPER_X86)
473 } else if (guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
474 struct cper_sec_proc_ia *ia_err = acpi_hest_get_payload(gdata);
475
476 printk("%ssection_type: IA32/X64 processor error\n", newpfx);
477 if (gdata->error_data_length >= sizeof(*ia_err))
478 cper_print_proc_ia(newpfx, ia_err);
479 else
480 goto err_section_too_small;
481#endif
482 } else {
483 const void *err = acpi_hest_get_payload(gdata);
484
485 printk("%ssection type: unknown, %pUl\n", newpfx, sec_type);
486 printk("%ssection length: %#x\n", newpfx,
487 gdata->error_data_length);
488 print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, err,
489 gdata->error_data_length, true);
490 }
491
492 return;
493
494err_section_too_small:
495 pr_err(FW_WARN "error section length is too small\n");
496}
497
498void cper_estatus_print(const char *pfx,
499 const struct acpi_hest_generic_status *estatus)
500{
501 struct acpi_hest_generic_data *gdata;
502 int sec_no = 0;
503 char newpfx[64];
504 __u16 severity;
505
506 severity = estatus->error_severity;
507 if (severity == CPER_SEV_CORRECTED)
508 printk("%s%s\n", pfx,
509 "It has been corrected by h/w "
510 "and requires no further action");
511 printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
512 snprintf(newpfx, sizeof(newpfx), "%s ", pfx);
513
514 apei_estatus_for_each_section(estatus, gdata) {
515 cper_estatus_print_section(newpfx, gdata, sec_no);
516 sec_no++;
517 }
518}
519EXPORT_SYMBOL_GPL(cper_estatus_print);
520
521int cper_estatus_check_header(const struct acpi_hest_generic_status *estatus)
522{
523 if (estatus->data_length &&
524 estatus->data_length < sizeof(struct acpi_hest_generic_data))
525 return -EINVAL;
526 if (estatus->raw_data_length &&
527 estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
528 return -EINVAL;
529
530 return 0;
531}
532EXPORT_SYMBOL_GPL(cper_estatus_check_header);
533
534int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
535{
536 struct acpi_hest_generic_data *gdata;
537 unsigned int data_len, record_size;
538 int rc;
539
540 rc = cper_estatus_check_header(estatus);
541 if (rc)
542 return rc;
543
544 data_len = estatus->data_length;
545
546 apei_estatus_for_each_section(estatus, gdata) {
547 if (sizeof(struct acpi_hest_generic_data) > data_len)
548 return -EINVAL;
549
550 record_size = acpi_hest_get_record_size(gdata);
551 if (record_size > data_len)
552 return -EINVAL;
553
554 data_len -= record_size;
555 }
556 if (data_len)
557 return -EINVAL;
558
559 return 0;
560}
561EXPORT_SYMBOL_GPL(cper_estatus_check);