at master 5.3 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Cortex A72 EDAC L1 and L2 cache error detection 4 * 5 * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de> 6 * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com> 7 * 8 * Based on Code from: 9 * Copyright (c) 2018, NXP Semiconductor 10 * Author: York Sun <york.sun@nxp.com> 11 */ 12 13#include <linux/module.h> 14#include <linux/of.h> 15#include <linux/bitfield.h> 16#include <asm/smp_plat.h> 17 18#include "edac_module.h" 19 20#define DRVNAME "a72-edac" 21 22#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2) 23#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3) 24 25#define CPUMERRSR_EL1_RAMID GENMASK(30, 24) 26#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18) 27 28#define CPUMERRSR_EL1_VALID BIT(31) 29#define CPUMERRSR_EL1_FATAL BIT(63) 30#define L2MERRSR_EL1_VALID BIT(31) 31#define L2MERRSR_EL1_FATAL BIT(63) 32 33#define L1_I_TAG_RAM 0x00 34#define L1_I_DATA_RAM 0x01 35#define L1_D_TAG_RAM 0x08 36#define L1_D_DATA_RAM 0x09 37#define TLB_RAM 0x18 38 39#define MESSAGE_SIZE 64 40 41struct mem_err_synd_reg { 42 u64 cpu_mesr; 43 u64 l2_mesr; 44}; 45 46static struct cpumask compat_mask; 47 48static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu, 49 struct mem_err_synd_reg *mesr) 50{ 51 u64 cpu_mesr = mesr->cpu_mesr; 52 u64 l2_mesr = mesr->l2_mesr; 53 char msg[MESSAGE_SIZE]; 54 55 if (cpu_mesr & CPUMERRSR_EL1_VALID) { 56 const char *str; 57 bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL; 58 59 switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) { 60 case L1_I_TAG_RAM: 61 str = "L1-I Tag RAM"; 62 break; 63 case L1_I_DATA_RAM: 64 str = "L1-I Data RAM"; 65 break; 66 case L1_D_TAG_RAM: 67 str = "L1-D Tag RAM"; 68 break; 69 case L1_D_DATA_RAM: 70 str = "L1-D Data RAM"; 71 break; 72 case TLB_RAM: 73 str = "TLB RAM"; 74 break; 75 default: 76 str = "Unspecified"; 77 break; 78 } 79 80 snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d", 81 str, fatal ? "fatal" : "correctable", cpu); 82 83 if (fatal) 84 edac_device_handle_ue(edac_ctl, cpu, 0, msg); 85 else 86 edac_device_handle_ce(edac_ctl, cpu, 0, msg); 87 } 88 89 if (l2_mesr & L2MERRSR_EL1_VALID) { 90 bool fatal = l2_mesr & L2MERRSR_EL1_FATAL; 91 92 snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx", 93 fatal ? "fatal" : "correctable", cpu, 94 FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr)); 95 if (fatal) 96 edac_device_handle_ue(edac_ctl, cpu, 1, msg); 97 else 98 edac_device_handle_ce(edac_ctl, cpu, 1, msg); 99 } 100} 101 102static void read_errors(void *data) 103{ 104 struct mem_err_synd_reg *mesr = data; 105 106 mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1); 107 if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) { 108 write_sysreg_s(0, SYS_CPUMERRSR_EL1); 109 isb(); 110 } 111 mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1); 112 if (mesr->l2_mesr & L2MERRSR_EL1_VALID) { 113 write_sysreg_s(0, SYS_L2MERRSR_EL1); 114 isb(); 115 } 116} 117 118static void a72_edac_check(struct edac_device_ctl_info *edac_ctl) 119{ 120 struct mem_err_synd_reg mesr; 121 int cpu; 122 123 cpus_read_lock(); 124 for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) { 125 smp_call_function_single(cpu, read_errors, &mesr, true); 126 report_errors(edac_ctl, cpu, &mesr); 127 } 128 cpus_read_unlock(); 129} 130 131static int a72_edac_probe(struct platform_device *pdev) 132{ 133 struct edac_device_ctl_info *edac_ctl; 134 struct device *dev = &pdev->dev; 135 int rc; 136 137 edac_ctl = edac_device_alloc_ctl_info(0, "cpu", 138 num_possible_cpus(), "L", 2, 1, 139 edac_device_alloc_index()); 140 if (!edac_ctl) 141 return -ENOMEM; 142 143 edac_ctl->edac_check = a72_edac_check; 144 edac_ctl->dev = dev; 145 edac_ctl->mod_name = dev_name(dev); 146 edac_ctl->dev_name = dev_name(dev); 147 edac_ctl->ctl_name = DRVNAME; 148 dev_set_drvdata(dev, edac_ctl); 149 150 rc = edac_device_add_device(edac_ctl); 151 if (rc) 152 goto out_dev; 153 154 return 0; 155 156out_dev: 157 edac_device_free_ctl_info(edac_ctl); 158 159 return rc; 160} 161 162static void a72_edac_remove(struct platform_device *pdev) 163{ 164 struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev); 165 166 edac_device_del_device(edac_ctl->dev); 167 edac_device_free_ctl_info(edac_ctl); 168} 169 170static const struct of_device_id cortex_arm64_edac_of_match[] = { 171 { .compatible = "arm,cortex-a72" }, 172 {} 173}; 174MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match); 175 176static struct platform_driver a72_edac_driver = { 177 .probe = a72_edac_probe, 178 .remove = a72_edac_remove, 179 .driver = { 180 .name = DRVNAME, 181 }, 182}; 183 184static struct platform_device *a72_pdev; 185 186static int __init a72_edac_driver_init(void) 187{ 188 int cpu; 189 190 for_each_possible_cpu(cpu) { 191 struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu); 192 if (np) { 193 if (of_match_node(cortex_arm64_edac_of_match, np) && 194 of_property_read_bool(np, "edac-enabled")) { 195 cpumask_set_cpu(cpu, &compat_mask); 196 } 197 } else { 198 pr_warn("failed to find device node for CPU %d\n", cpu); 199 } 200 } 201 202 if (cpumask_empty(&compat_mask)) 203 return 0; 204 205 a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0); 206 if (IS_ERR(a72_pdev)) { 207 pr_err("failed to register A72 EDAC device\n"); 208 return PTR_ERR(a72_pdev); 209 } 210 211 return platform_driver_register(&a72_edac_driver); 212} 213 214static void __exit a72_edac_driver_exit(void) 215{ 216 platform_device_unregister(a72_pdev); 217 platform_driver_unregister(&a72_edac_driver); 218} 219 220module_init(a72_edac_driver_init); 221module_exit(a72_edac_driver_exit); 222 223MODULE_LICENSE("GPL"); 224MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>"); 225MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");