Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Cortex A72 EDAC L1 and L2 cache error detection
4 *
5 * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
6 * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com>
7 *
8 * Based on Code from:
9 * Copyright (c) 2018, NXP Semiconductor
10 * Author: York Sun <york.sun@nxp.com>
11 */
12
13#include <linux/module.h>
14#include <linux/of.h>
15#include <linux/bitfield.h>
16#include <asm/smp_plat.h>
17
18#include "edac_module.h"
19
20#define DRVNAME "a72-edac"
21
22#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2)
23#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3)
24
25#define CPUMERRSR_EL1_RAMID GENMASK(30, 24)
26#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18)
27
28#define CPUMERRSR_EL1_VALID BIT(31)
29#define CPUMERRSR_EL1_FATAL BIT(63)
30#define L2MERRSR_EL1_VALID BIT(31)
31#define L2MERRSR_EL1_FATAL BIT(63)
32
33#define L1_I_TAG_RAM 0x00
34#define L1_I_DATA_RAM 0x01
35#define L1_D_TAG_RAM 0x08
36#define L1_D_DATA_RAM 0x09
37#define TLB_RAM 0x18
38
39#define MESSAGE_SIZE 64
40
41struct mem_err_synd_reg {
42 u64 cpu_mesr;
43 u64 l2_mesr;
44};
45
46static struct cpumask compat_mask;
47
48static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu,
49 struct mem_err_synd_reg *mesr)
50{
51 u64 cpu_mesr = mesr->cpu_mesr;
52 u64 l2_mesr = mesr->l2_mesr;
53 char msg[MESSAGE_SIZE];
54
55 if (cpu_mesr & CPUMERRSR_EL1_VALID) {
56 const char *str;
57 bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL;
58
59 switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) {
60 case L1_I_TAG_RAM:
61 str = "L1-I Tag RAM";
62 break;
63 case L1_I_DATA_RAM:
64 str = "L1-I Data RAM";
65 break;
66 case L1_D_TAG_RAM:
67 str = "L1-D Tag RAM";
68 break;
69 case L1_D_DATA_RAM:
70 str = "L1-D Data RAM";
71 break;
72 case TLB_RAM:
73 str = "TLB RAM";
74 break;
75 default:
76 str = "Unspecified";
77 break;
78 }
79
80 snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d",
81 str, fatal ? "fatal" : "correctable", cpu);
82
83 if (fatal)
84 edac_device_handle_ue(edac_ctl, cpu, 0, msg);
85 else
86 edac_device_handle_ce(edac_ctl, cpu, 0, msg);
87 }
88
89 if (l2_mesr & L2MERRSR_EL1_VALID) {
90 bool fatal = l2_mesr & L2MERRSR_EL1_FATAL;
91
92 snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx",
93 fatal ? "fatal" : "correctable", cpu,
94 FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr));
95 if (fatal)
96 edac_device_handle_ue(edac_ctl, cpu, 1, msg);
97 else
98 edac_device_handle_ce(edac_ctl, cpu, 1, msg);
99 }
100}
101
102static void read_errors(void *data)
103{
104 struct mem_err_synd_reg *mesr = data;
105
106 mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1);
107 if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) {
108 write_sysreg_s(0, SYS_CPUMERRSR_EL1);
109 isb();
110 }
111 mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1);
112 if (mesr->l2_mesr & L2MERRSR_EL1_VALID) {
113 write_sysreg_s(0, SYS_L2MERRSR_EL1);
114 isb();
115 }
116}
117
118static void a72_edac_check(struct edac_device_ctl_info *edac_ctl)
119{
120 struct mem_err_synd_reg mesr;
121 int cpu;
122
123 cpus_read_lock();
124 for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) {
125 smp_call_function_single(cpu, read_errors, &mesr, true);
126 report_errors(edac_ctl, cpu, &mesr);
127 }
128 cpus_read_unlock();
129}
130
131static int a72_edac_probe(struct platform_device *pdev)
132{
133 struct edac_device_ctl_info *edac_ctl;
134 struct device *dev = &pdev->dev;
135 int rc;
136
137 edac_ctl = edac_device_alloc_ctl_info(0, "cpu",
138 num_possible_cpus(), "L", 2, 1,
139 edac_device_alloc_index());
140 if (!edac_ctl)
141 return -ENOMEM;
142
143 edac_ctl->edac_check = a72_edac_check;
144 edac_ctl->dev = dev;
145 edac_ctl->mod_name = dev_name(dev);
146 edac_ctl->dev_name = dev_name(dev);
147 edac_ctl->ctl_name = DRVNAME;
148 dev_set_drvdata(dev, edac_ctl);
149
150 rc = edac_device_add_device(edac_ctl);
151 if (rc)
152 goto out_dev;
153
154 return 0;
155
156out_dev:
157 edac_device_free_ctl_info(edac_ctl);
158
159 return rc;
160}
161
162static void a72_edac_remove(struct platform_device *pdev)
163{
164 struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev);
165
166 edac_device_del_device(edac_ctl->dev);
167 edac_device_free_ctl_info(edac_ctl);
168}
169
170static const struct of_device_id cortex_arm64_edac_of_match[] = {
171 { .compatible = "arm,cortex-a72" },
172 {}
173};
174MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
175
176static struct platform_driver a72_edac_driver = {
177 .probe = a72_edac_probe,
178 .remove = a72_edac_remove,
179 .driver = {
180 .name = DRVNAME,
181 },
182};
183
184static struct platform_device *a72_pdev;
185
186static int __init a72_edac_driver_init(void)
187{
188 int cpu;
189
190 for_each_possible_cpu(cpu) {
191 struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
192 if (np) {
193 if (of_match_node(cortex_arm64_edac_of_match, np) &&
194 of_property_read_bool(np, "edac-enabled")) {
195 cpumask_set_cpu(cpu, &compat_mask);
196 }
197 } else {
198 pr_warn("failed to find device node for CPU %d\n", cpu);
199 }
200 }
201
202 if (cpumask_empty(&compat_mask))
203 return 0;
204
205 a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
206 if (IS_ERR(a72_pdev)) {
207 pr_err("failed to register A72 EDAC device\n");
208 return PTR_ERR(a72_pdev);
209 }
210
211 return platform_driver_register(&a72_edac_driver);
212}
213
214static void __exit a72_edac_driver_exit(void)
215{
216 platform_device_unregister(a72_pdev);
217 platform_driver_unregister(&a72_edac_driver);
218}
219
220module_init(a72_edac_driver_init);
221module_exit(a72_edac_driver_exit);
222
223MODULE_LICENSE("GPL");
224MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
225MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");