Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ACPI: APEI: Fix AER info corruption when error status data has multiple sections

ghes_handle_aer() passes AER data to the PCI core for logging and
recovery by calling aer_recover_queue() with a pointer to struct
aer_capability_regs.

The problem was that aer_recover_queue() queues the pointer directly
without copying the aer_capability_regs data. The pointer was to
the ghes->estatus buffer, which could be reused before
aer_recover_work_func() reads the data.

To avoid this problem, allocate a new aer_capability_regs structure
from the ghes_estatus_pool, copy the AER data from the ghes->estatus
buffer into it, pass a pointer to the new struct to
aer_recover_queue(), and free it after aer_recover_work_func() has
processed it.

Reported-by: Bjorn Helgaas <helgaas@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
[ rjw: Subject edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Shiju Jose and committed by
Rafael J. Wysocki
e2abc47a ce9ecca0

+36 -1
+22 -1
drivers/acpi/apei/ghes.c
··· 209 209 return -ENOMEM; 210 210 } 211 211 212 + /** 213 + * ghes_estatus_pool_region_free - free previously allocated memory 214 + * from the ghes_estatus_pool. 215 + * @addr: address of memory to free. 216 + * @size: size of memory to free. 217 + * 218 + * Returns none. 219 + */ 220 + void ghes_estatus_pool_region_free(unsigned long addr, u32 size) 221 + { 222 + gen_pool_free(ghes_estatus_pool, addr, size); 223 + } 224 + EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free); 225 + 212 226 static int map_gen_v2(struct ghes *ghes) 213 227 { 214 228 return apei_map_generic_address(&ghes->generic_v2->read_ack_register); ··· 578 564 pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) { 579 565 unsigned int devfn; 580 566 int aer_severity; 567 + u8 *aer_info; 581 568 582 569 devfn = PCI_DEVFN(pcie_err->device_id.device, 583 570 pcie_err->device_id.function); ··· 592 577 if (gdata->flags & CPER_SEC_RESET) 593 578 aer_severity = AER_FATAL; 594 579 580 + aer_info = (void *)gen_pool_alloc(ghes_estatus_pool, 581 + sizeof(struct aer_capability_regs)); 582 + if (!aer_info) 583 + return; 584 + memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs)); 585 + 595 586 aer_recover_queue(pcie_err->device_id.segment, 596 587 pcie_err->device_id.bus, 597 588 devfn, aer_severity, 598 589 (struct aer_capability_regs *) 599 - pcie_err->aer_info); 590 + aer_info); 600 591 } 601 592 #endif 602 593 }
+10
drivers/pci/pcie/aer.c
··· 29 29 #include <linux/kfifo.h> 30 30 #include <linux/slab.h> 31 31 #include <acpi/apei.h> 32 + #include <acpi/ghes.h> 32 33 #include <ras/ras_event.h> 33 34 34 35 #include "../pci.h" ··· 997 996 continue; 998 997 } 999 998 cper_print_aer(pdev, entry.severity, entry.regs); 999 + /* 1000 + * Memory for aer_capability_regs(entry.regs) is being allocated from the 1001 + * ghes_estatus_pool to protect it from overwriting when multiple sections 1002 + * are present in the error status. Thus free the same after processing 1003 + * the data. 1004 + */ 1005 + ghes_estatus_pool_region_free((unsigned long)entry.regs, 1006 + sizeof(struct aer_capability_regs)); 1007 + 1000 1008 if (entry.severity == AER_NONFATAL) 1001 1009 pcie_do_recovery(pdev, pci_channel_io_normal, 1002 1010 aer_root_reset);
+4
include/acpi/ghes.h
··· 73 73 void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); 74 74 75 75 struct list_head *ghes_get_devices(void); 76 + 77 + void ghes_estatus_pool_region_free(unsigned long addr, u32 size); 76 78 #else 77 79 static inline struct list_head *ghes_get_devices(void) { return NULL; } 80 + 81 + static inline void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { return; } 78 82 #endif 79 83 80 84 int ghes_estatus_pool_init(unsigned int num_ghes);