Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ghes_edac: Register at EDAC core the BIOS report

Register GHES at EDAC MC core, in order to avoid other
drivers to also handle errors and mangle with error data.

The edac core will warrant that just one driver will be used,
so the first one to register (BIOS first) will be the one that
will be reporting the hardware errors.

For now, the EDAC driver does nothing but to register at the
EDAC core, preventing the hardware-driven mechanism to
interfere with GHES.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>

+145
+7
MAINTAINERS
··· 2803 2803 S: Maintained 2804 2804 F: drivers/edac/e7xxx_edac.c 2805 2805 2806 + EDAC-GHES 2807 + M: Mauro Carvalho Chehab <mchehab@redhat.com> 2808 + L: linux-edac@vger.kernel.org 2809 + W: bluesmoke.sourceforge.net 2810 + S: Maintained 2811 + F: drivers/edac/ghes-edac.c 2812 + 2806 2813 EDAC-I82443BXGX 2807 2814 M: Tim Small <tim@buttersideup.com> 2808 2815 L: linux-edac@vger.kernel.org
+23
drivers/edac/Kconfig
··· 80 80 occurred so that a particular failing memory module can be 81 81 replaced. If unsure, select 'Y'. 82 82 83 + config EDAC_GHES 84 + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" 85 + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) 86 + default y 87 + help 88 + Not all machines support hardware-driven error report. Some of those 89 + provide a BIOS-driven error report mechanism via ACPI, using the 90 + APEI/GHES driver. By enabling this option, the error reports provided 91 + by GHES are sent to userspace via the EDAC API. 92 + 93 + When this option is enabled, it will disable the hardware-driven 94 + mechanisms, if a GHES BIOS is detected, entering into the 95 + "Firmware First" mode. 96 + 97 + It should be noticed that keeping both GHES and a hardware-driven 98 + error mechanism won't work well, as BIOS will race with OS, while 99 + reading the error registers. So, if you want to not use "Firmware 100 + first" GHES error mechanism, you should disable GHES either at 101 + compilation time or by passing "ghes.disable=1" Kernel parameter 102 + at boot time. 103 + 104 + In doubt, say 'Y'. 105 + 83 106 config EDAC_AMD64 84 107 tristate "AMD64 (Opteron, Athlon64) K8, F10h" 85 108 depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
+1
drivers/edac/Makefile
··· 16 16 edac_core-y += edac_pci.o edac_pci_sysfs.o 17 17 endif 18 18 19 + obj-$(CONFIG_EDAC_GHES) += ghes_edac.o 19 20 obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o 20 21 21 22 edac_mce_amd-y := mce_amd.o
+114
drivers/edac/ghes_edac.c
··· 1 + /* 2 + * GHES/EDAC Linux driver 3 + * 4 + * This file may be distributed under the terms of the GNU General Public 5 + * License version 2. 6 + * 7 + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> 8 + * 9 + * Red Hat Inc. http://www.redhat.com 10 + */ 11 + 12 + #include <acpi/ghes.h> 13 + #include <linux/edac.h> 14 + #include "edac_core.h" 15 + 16 + #define GHES_PFX "ghes_edac: " 17 + #define GHES_EDAC_REVISION " Ver: 1.0.0" 18 + 19 + struct ghes_edac_pvt { 20 + struct list_head list; 21 + struct ghes *ghes; 22 + struct mem_ctl_info *mci; 23 + }; 24 + 25 + static LIST_HEAD(ghes_reglist); 26 + static DEFINE_MUTEX(ghes_edac_lock); 27 + static int ghes_edac_mc_num; 28 + 29 + void ghes_edac_report_mem_error(struct ghes *ghes, int sev, 30 + struct cper_sec_mem_err *mem_err) 31 + { 32 + } 33 + EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); 34 + 35 + int ghes_edac_register(struct ghes *ghes, struct device *dev) 36 + { 37 + int rc; 38 + struct mem_ctl_info *mci; 39 + struct edac_mc_layer layers[1]; 40 + struct csrow_info *csrow; 41 + struct dimm_info *dimm; 42 + struct ghes_edac_pvt *pvt; 43 + 44 + layers[0].type = EDAC_MC_LAYER_ALL_MEM; 45 + layers[0].size = 1; 46 + layers[0].is_virt_csrow = true; 47 + 48 + /* 49 + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), 50 + * to avoid duplicated memory controller numbers 51 + */ 52 + mutex_lock(&ghes_edac_lock); 53 + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, 54 + sizeof(*pvt)); 55 + if (!mci) { 56 + pr_info(GHES_PFX "Can't allocate memory for EDAC data\n"); 57 + mutex_unlock(&ghes_edac_lock); 58 + return -ENOMEM; 59 + } 60 + 61 + pvt = mci->pvt_info; 62 + memset(pvt, 0, sizeof(*pvt)); 63 + list_add_tail(&pvt->list, &ghes_reglist); 64 + pvt->ghes = ghes; 65 + pvt->mci = mci; 66 + mci->pdev = dev; 67 + 68 + mci->mtype_cap = MEM_FLAG_EMPTY; 69 + mci->edac_ctl_cap = EDAC_FLAG_NONE; 70 + mci->edac_cap = EDAC_FLAG_NONE; 71 + mci->mod_name = "ghes_edac.c"; 72 + mci->mod_ver = GHES_EDAC_REVISION; 73 + mci->ctl_name = "ghes_edac"; 74 + mci->dev_name = "ghes"; 75 + 76 + csrow = mci->csrows[0]; 77 + dimm = csrow->channels[0]->dimm; 78 + 79 + /* FIXME: FAKE DATA */ 80 + dimm->nr_pages = 1000; 81 + dimm->grain = 128; 82 + dimm->mtype = MEM_UNKNOWN; 83 + dimm->dtype = DEV_UNKNOWN; 84 + dimm->edac_mode = EDAC_SECDED; 85 + 86 + rc = edac_mc_add_mc(mci); 87 + if (rc < 0) { 88 + pr_info(GHES_PFX "Can't register at EDAC core\n"); 89 + edac_mc_free(mci); 90 + mutex_unlock(&ghes_edac_lock); 91 + return -ENODEV; 92 + } 93 + 94 + ghes_edac_mc_num++; 95 + mutex_unlock(&ghes_edac_lock); 96 + return 0; 97 + } 98 + EXPORT_SYMBOL_GPL(ghes_edac_register); 99 + 100 + void ghes_edac_unregister(struct ghes *ghes) 101 + { 102 + struct mem_ctl_info *mci; 103 + struct ghes_edac_pvt *pvt; 104 + 105 + list_for_each_entry(pvt, &ghes_reglist, list) { 106 + if (ghes == pvt->ghes) { 107 + mci = pvt->mci; 108 + edac_mc_del_mc(mci->pdev); 109 + edac_mc_free(mci); 110 + list_del(&pvt->list); 111 + } 112 + } 113 + } 114 + EXPORT_SYMBOL_GPL(ghes_edac_unregister);