Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * edac_mc kernel module
3 * (C) 2005-2007 Linux Networx (http://lnxi.com)
4 *
5 * This file may be distributed under the terms of the
6 * GNU General Public License.
7 *
8 * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com
9 *
10 * (c) 2012-2013 - Mauro Carvalho Chehab
11 * The entire API were re-written, and ported to use struct device
12 *
13 */
14
15#include <linux/ctype.h>
16#include <linux/slab.h>
17#include <linux/edac.h>
18#include <linux/bug.h>
19#include <linux/pm_runtime.h>
20#include <linux/uaccess.h>
21
22#include "edac_mc.h"
23#include "edac_module.h"
24
25/* MC EDAC Controls, setable by module parameter, and sysfs */
26static int edac_mc_log_ue = 1;
27static int edac_mc_log_ce = 1;
28static int edac_mc_panic_on_ue;
29static unsigned int edac_mc_poll_msec = 1000;
30
31/* Getter functions for above */
32int edac_mc_get_log_ue(void)
33{
34 return edac_mc_log_ue;
35}
36
37int edac_mc_get_log_ce(void)
38{
39 return edac_mc_log_ce;
40}
41
42int edac_mc_get_panic_on_ue(void)
43{
44 return edac_mc_panic_on_ue;
45}
46
47/* this is temporary */
48unsigned int edac_mc_get_poll_msec(void)
49{
50 return edac_mc_poll_msec;
51}
52
53static int edac_set_poll_msec(const char *val, const struct kernel_param *kp)
54{
55 unsigned int i;
56 int ret;
57
58 if (!val)
59 return -EINVAL;
60
61 ret = kstrtouint(val, 0, &i);
62 if (ret)
63 return ret;
64
65 if (i < 1000)
66 return -EINVAL;
67
68 *((unsigned int *)kp->arg) = i;
69
70 /* notify edac_mc engine to reset the poll period */
71 edac_mc_reset_delay_period(i);
72
73 return 0;
74}
75
76/* Parameter declarations for above */
77module_param(edac_mc_panic_on_ue, int, 0644);
78MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
79module_param(edac_mc_log_ue, int, 0644);
80MODULE_PARM_DESC(edac_mc_log_ue,
81 "Log uncorrectable error to console: 0=off 1=on");
82module_param(edac_mc_log_ce, int, 0644);
83MODULE_PARM_DESC(edac_mc_log_ce,
84 "Log correctable error to console: 0=off 1=on");
85module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_uint,
86 &edac_mc_poll_msec, 0644);
87MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds");
88
89static struct device *mci_pdev;
90
91/*
92 * various constants for Memory Controllers
93 */
94static const char * const dev_types[] = {
95 [DEV_UNKNOWN] = "Unknown",
96 [DEV_X1] = "x1",
97 [DEV_X2] = "x2",
98 [DEV_X4] = "x4",
99 [DEV_X8] = "x8",
100 [DEV_X16] = "x16",
101 [DEV_X32] = "x32",
102 [DEV_X64] = "x64"
103};
104
105static const char * const edac_caps[] = {
106 [EDAC_UNKNOWN] = "Unknown",
107 [EDAC_NONE] = "None",
108 [EDAC_RESERVED] = "Reserved",
109 [EDAC_PARITY] = "PARITY",
110 [EDAC_EC] = "EC",
111 [EDAC_SECDED] = "SECDED",
112 [EDAC_S2ECD2ED] = "S2ECD2ED",
113 [EDAC_S4ECD4ED] = "S4ECD4ED",
114 [EDAC_S8ECD8ED] = "S8ECD8ED",
115 [EDAC_S16ECD16ED] = "S16ECD16ED"
116};
117
118/*
119 * Per-dimm (or per-rank) devices
120 */
121
122#define to_dimm(k) container_of(k, struct dimm_info, dev)
123
124/* show/store functions for DIMM Label attributes */
125static ssize_t dimmdev_location_show(struct device *dev,
126 struct device_attribute *mattr, char *data)
127{
128 struct dimm_info *dimm = to_dimm(dev);
129 ssize_t count;
130
131 count = edac_dimm_info_location(dimm, data, PAGE_SIZE);
132 count += scnprintf(data + count, PAGE_SIZE - count, "\n");
133
134 return count;
135}
136
137static ssize_t dimmdev_label_show(struct device *dev,
138 struct device_attribute *mattr, char *data)
139{
140 struct dimm_info *dimm = to_dimm(dev);
141
142 /* if field has not been initialized, there is nothing to send */
143 if (!dimm->label[0])
144 return 0;
145
146 return sysfs_emit(data, "%s\n", dimm->label);
147}
148
149static ssize_t dimmdev_label_store(struct device *dev,
150 struct device_attribute *mattr,
151 const char *data,
152 size_t count)
153{
154 struct dimm_info *dimm = to_dimm(dev);
155 size_t copy_count = count;
156
157 if (count == 0)
158 return -EINVAL;
159
160 if (data[count - 1] == '\0' || data[count - 1] == '\n')
161 copy_count -= 1;
162
163 if (copy_count == 0 || copy_count >= sizeof(dimm->label))
164 return -EINVAL;
165
166 memcpy(dimm->label, data, copy_count);
167 dimm->label[copy_count] = '\0';
168
169 return count;
170}
171
172static ssize_t dimmdev_size_show(struct device *dev,
173 struct device_attribute *mattr, char *data)
174{
175 struct dimm_info *dimm = to_dimm(dev);
176
177 return sysfs_emit(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages));
178}
179
180static ssize_t dimmdev_mem_type_show(struct device *dev,
181 struct device_attribute *mattr, char *data)
182{
183 struct dimm_info *dimm = to_dimm(dev);
184
185 return sysfs_emit(data, "%s\n", edac_mem_types[dimm->mtype]);
186}
187
188static ssize_t dimmdev_dev_type_show(struct device *dev,
189 struct device_attribute *mattr, char *data)
190{
191 struct dimm_info *dimm = to_dimm(dev);
192
193 return sysfs_emit(data, "%s\n", dev_types[dimm->dtype]);
194}
195
196static ssize_t dimmdev_edac_mode_show(struct device *dev,
197 struct device_attribute *mattr,
198 char *data)
199{
200 struct dimm_info *dimm = to_dimm(dev);
201
202 return sysfs_emit(data, "%s\n", edac_caps[dimm->edac_mode]);
203}
204
205static ssize_t dimmdev_ce_count_show(struct device *dev,
206 struct device_attribute *mattr,
207 char *data)
208{
209 struct dimm_info *dimm = to_dimm(dev);
210
211 return sysfs_emit(data, "%u\n", dimm->ce_count);
212}
213
214static ssize_t dimmdev_ue_count_show(struct device *dev,
215 struct device_attribute *mattr,
216 char *data)
217{
218 struct dimm_info *dimm = to_dimm(dev);
219
220 return sysfs_emit(data, "%u\n", dimm->ue_count);
221}
222
223/* dimm/rank attribute files */
224static DEVICE_ATTR(dimm_label, S_IRUGO | S_IWUSR,
225 dimmdev_label_show, dimmdev_label_store);
226static DEVICE_ATTR(dimm_location, S_IRUGO, dimmdev_location_show, NULL);
227static DEVICE_ATTR(size, S_IRUGO, dimmdev_size_show, NULL);
228static DEVICE_ATTR(dimm_mem_type, S_IRUGO, dimmdev_mem_type_show, NULL);
229static DEVICE_ATTR(dimm_dev_type, S_IRUGO, dimmdev_dev_type_show, NULL);
230static DEVICE_ATTR(dimm_edac_mode, S_IRUGO, dimmdev_edac_mode_show, NULL);
231static DEVICE_ATTR(dimm_ce_count, S_IRUGO, dimmdev_ce_count_show, NULL);
232static DEVICE_ATTR(dimm_ue_count, S_IRUGO, dimmdev_ue_count_show, NULL);
233
234/* attributes of the dimm<id>/rank<id> object */
235static struct attribute *dimm_attrs[] = {
236 &dev_attr_dimm_label.attr,
237 &dev_attr_dimm_location.attr,
238 &dev_attr_size.attr,
239 &dev_attr_dimm_mem_type.attr,
240 &dev_attr_dimm_dev_type.attr,
241 &dev_attr_dimm_edac_mode.attr,
242 &dev_attr_dimm_ce_count.attr,
243 &dev_attr_dimm_ue_count.attr,
244 NULL,
245};
246
247static const struct attribute_group dimm_attr_grp = {
248 .attrs = dimm_attrs,
249};
250
251static const struct attribute_group *dimm_attr_groups[] = {
252 &dimm_attr_grp,
253 NULL
254};
255
256static const struct device_type dimm_attr_type = {
257 .groups = dimm_attr_groups,
258};
259
260static void dimm_release(struct device *dev)
261{
262 /*
263 * Nothing to do, just unregister sysfs here. The mci
264 * device owns the data and will also release it.
265 */
266}
267
268/* Create a DIMM object under specified memory controller device */
269static int edac_create_dimm_object(struct mem_ctl_info *mci,
270 struct dimm_info *dimm)
271{
272 int err;
273 dimm->mci = mci;
274
275 dimm->dev.type = &dimm_attr_type;
276 dimm->dev.release = dimm_release;
277 device_initialize(&dimm->dev);
278
279 dimm->dev.parent = &mci->dev;
280 if (mci->csbased)
281 dev_set_name(&dimm->dev, "rank%d", dimm->idx);
282 else
283 dev_set_name(&dimm->dev, "dimm%d", dimm->idx);
284 dev_set_drvdata(&dimm->dev, dimm);
285 pm_runtime_forbid(&mci->dev);
286
287 err = device_add(&dimm->dev);
288 if (err) {
289 edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev));
290 put_device(&dimm->dev);
291 return err;
292 }
293
294 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
295 char location[80];
296
297 edac_dimm_info_location(dimm, location, sizeof(location));
298 edac_dbg(0, "device %s created at location %s\n",
299 dev_name(&dimm->dev), location);
300 }
301
302 return 0;
303}
304
305/*
306 * Memory controller device
307 */
308
309#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
310
311static ssize_t mci_reset_counters_store(struct device *dev,
312 struct device_attribute *mattr,
313 const char *data, size_t count)
314{
315 struct mem_ctl_info *mci = to_mci(dev);
316 struct dimm_info *dimm;
317 int row, chan;
318
319 mci->ue_mc = 0;
320 mci->ce_mc = 0;
321 mci->ue_noinfo_count = 0;
322 mci->ce_noinfo_count = 0;
323
324 for (row = 0; row < mci->nr_csrows; row++) {
325 struct csrow_info *ri = mci->csrows[row];
326
327 ri->ue_count = 0;
328 ri->ce_count = 0;
329
330 for (chan = 0; chan < ri->nr_channels; chan++)
331 ri->channels[chan]->ce_count = 0;
332 }
333
334 mci_for_each_dimm(mci, dimm) {
335 dimm->ue_count = 0;
336 dimm->ce_count = 0;
337 }
338
339 mci->start_time = jiffies;
340 return count;
341}
342
343/* Memory scrubbing interface:
344 *
345 * A MC driver can limit the scrubbing bandwidth based on the CPU type.
346 * Therefore, ->set_sdram_scrub_rate should be made to return the actual
347 * bandwidth that is accepted or 0 when scrubbing is to be disabled.
348 *
349 * Negative value still means that an error has occurred while setting
350 * the scrub rate.
351 */
352static ssize_t mci_sdram_scrub_rate_store(struct device *dev,
353 struct device_attribute *mattr,
354 const char *data, size_t count)
355{
356 struct mem_ctl_info *mci = to_mci(dev);
357 unsigned long bandwidth = 0;
358 int new_bw = 0;
359
360 if (kstrtoul(data, 10, &bandwidth) < 0)
361 return -EINVAL;
362
363 new_bw = mci->set_sdram_scrub_rate(mci, bandwidth);
364 if (new_bw < 0) {
365 edac_printk(KERN_WARNING, EDAC_MC,
366 "Error setting scrub rate to: %lu\n", bandwidth);
367 return -EINVAL;
368 }
369
370 return count;
371}
372
373/*
374 * ->get_sdram_scrub_rate() return value semantics same as above.
375 */
376static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
377 struct device_attribute *mattr,
378 char *data)
379{
380 struct mem_ctl_info *mci = to_mci(dev);
381 int bandwidth = 0;
382
383 bandwidth = mci->get_sdram_scrub_rate(mci);
384 if (bandwidth < 0) {
385 edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
386 return bandwidth;
387 }
388
389 return sysfs_emit(data, "%d\n", bandwidth);
390}
391
392/* default attribute files for the MCI object */
393static ssize_t mci_ue_count_show(struct device *dev,
394 struct device_attribute *mattr,
395 char *data)
396{
397 struct mem_ctl_info *mci = to_mci(dev);
398
399 return sysfs_emit(data, "%u\n", mci->ue_mc);
400}
401
402static ssize_t mci_ce_count_show(struct device *dev,
403 struct device_attribute *mattr,
404 char *data)
405{
406 struct mem_ctl_info *mci = to_mci(dev);
407
408 return sysfs_emit(data, "%u\n", mci->ce_mc);
409}
410
411static ssize_t mci_ce_noinfo_show(struct device *dev,
412 struct device_attribute *mattr,
413 char *data)
414{
415 struct mem_ctl_info *mci = to_mci(dev);
416
417 return sysfs_emit(data, "%u\n", mci->ce_noinfo_count);
418}
419
420static ssize_t mci_ue_noinfo_show(struct device *dev,
421 struct device_attribute *mattr,
422 char *data)
423{
424 struct mem_ctl_info *mci = to_mci(dev);
425
426 return sysfs_emit(data, "%u\n", mci->ue_noinfo_count);
427}
428
429static ssize_t mci_seconds_show(struct device *dev,
430 struct device_attribute *mattr,
431 char *data)
432{
433 struct mem_ctl_info *mci = to_mci(dev);
434
435 return sysfs_emit(data, "%ld\n", (jiffies - mci->start_time) / HZ);
436}
437
438static ssize_t mci_ctl_name_show(struct device *dev,
439 struct device_attribute *mattr,
440 char *data)
441{
442 struct mem_ctl_info *mci = to_mci(dev);
443
444 return sysfs_emit(data, "%s\n", mci->ctl_name);
445}
446
447static ssize_t mci_size_mb_show(struct device *dev,
448 struct device_attribute *mattr,
449 char *data)
450{
451 struct mem_ctl_info *mci = to_mci(dev);
452 int total_pages = 0, csrow_idx, j;
453
454 for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) {
455 struct csrow_info *csrow = mci->csrows[csrow_idx];
456
457 for (j = 0; j < csrow->nr_channels; j++) {
458 struct dimm_info *dimm = csrow->channels[j]->dimm;
459
460 total_pages += dimm->nr_pages;
461 }
462 }
463
464 return sysfs_emit(data, "%u\n", PAGES_TO_MiB(total_pages));
465}
466
467static ssize_t mci_max_location_show(struct device *dev,
468 struct device_attribute *mattr,
469 char *data)
470{
471 struct mem_ctl_info *mci = to_mci(dev);
472 int len = PAGE_SIZE;
473 char *p = data;
474 int i, n;
475
476 for (i = 0; i < mci->n_layers; i++) {
477 n = scnprintf(p, len, "%s %d ",
478 edac_layer_name[mci->layers[i].type],
479 mci->layers[i].size - 1);
480 len -= n;
481 if (len <= 0)
482 goto out;
483
484 p += n;
485 }
486
487 p += scnprintf(p, len, "\n");
488out:
489 return p - data;
490}
491
492/* default Control file */
493static DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store);
494
495/* default Attribute files */
496static DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL);
497static DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL);
498static DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL);
499static DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL);
500static DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL);
501static DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL);
502static DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
503static DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
504
505/* memory scrubber attribute file */
506static DEVICE_ATTR(sdram_scrub_rate, 0, mci_sdram_scrub_rate_show,
507 mci_sdram_scrub_rate_store); /* umode set later in is_visible */
508
509static struct attribute *mci_attrs[] = {
510 &dev_attr_reset_counters.attr,
511 &dev_attr_mc_name.attr,
512 &dev_attr_size_mb.attr,
513 &dev_attr_seconds_since_reset.attr,
514 &dev_attr_ue_noinfo_count.attr,
515 &dev_attr_ce_noinfo_count.attr,
516 &dev_attr_ue_count.attr,
517 &dev_attr_ce_count.attr,
518 &dev_attr_max_location.attr,
519 &dev_attr_sdram_scrub_rate.attr,
520 NULL
521};
522
523static umode_t mci_attr_is_visible(struct kobject *kobj,
524 struct attribute *attr, int idx)
525{
526 struct device *dev = kobj_to_dev(kobj);
527 struct mem_ctl_info *mci = to_mci(dev);
528 umode_t mode = 0;
529
530 if (attr != &dev_attr_sdram_scrub_rate.attr)
531 return attr->mode;
532 if (mci->get_sdram_scrub_rate)
533 mode |= S_IRUGO;
534 if (mci->set_sdram_scrub_rate)
535 mode |= S_IWUSR;
536 return mode;
537}
538
539static const struct attribute_group mci_attr_grp = {
540 .attrs = mci_attrs,
541 .is_visible = mci_attr_is_visible,
542};
543
544static const struct attribute_group *mci_attr_groups[] = {
545 &mci_attr_grp,
546 NULL
547};
548
549static const struct device_type mci_attr_type = {
550 .groups = mci_attr_groups,
551};
552
553/*
554 * Create a new Memory Controller kobject instance,
555 * mc<id> under the 'mc' directory
556 *
557 * Return:
558 * 0 Success
559 * !0 Failure
560 */
561int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
562 const struct attribute_group **groups)
563{
564 struct dimm_info *dimm;
565 int err;
566
567 /* get the /sys/devices/system/edac subsys reference */
568 mci->dev.type = &mci_attr_type;
569 mci->dev.parent = mci_pdev;
570 mci->dev.groups = groups;
571 dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
572 dev_set_drvdata(&mci->dev, mci);
573 pm_runtime_forbid(&mci->dev);
574
575 err = device_add(&mci->dev);
576 if (err < 0) {
577 edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
578 /* no put_device() here, free mci with _edac_mc_free() */
579 return err;
580 }
581
582 edac_dbg(0, "device %s created\n", dev_name(&mci->dev));
583
584 /*
585 * Create the dimm/rank devices
586 */
587 mci_for_each_dimm(mci, dimm) {
588 /* Only expose populated DIMMs */
589 if (!dimm->nr_pages)
590 continue;
591
592 err = edac_create_dimm_object(mci, dimm);
593 if (err)
594 goto fail;
595 }
596
597 edac_create_debugfs_nodes(mci);
598 return 0;
599
600fail:
601 edac_remove_sysfs_mci_device(mci);
602
603 return err;
604}
605
606/*
607 * remove a Memory Controller instance
608 */
609void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
610{
611 struct dimm_info *dimm;
612
613 if (!device_is_registered(&mci->dev))
614 return;
615
616 edac_dbg(0, "\n");
617
618#ifdef CONFIG_EDAC_DEBUG
619 edac_debugfs_remove_recursive(mci->debugfs);
620#endif
621
622 mci_for_each_dimm(mci, dimm) {
623 if (!device_is_registered(&dimm->dev))
624 continue;
625 edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev));
626 device_unregister(&dimm->dev);
627 }
628
629 /* only remove the device, but keep mci */
630 device_del(&mci->dev);
631}
632
633static void mc_attr_release(struct device *dev)
634{
635 /*
636 * There's no container structure here, as this is just the mci
637 * parent device, used to create the /sys/devices/mc sysfs node.
638 * So, there are no attributes on it.
639 */
640 edac_dbg(1, "device %s released\n", dev_name(dev));
641 kfree(dev);
642}
643
644/*
645 * Init/exit code for the module. Basically, creates/removes /sys/class/rc
646 */
647int __init edac_mc_sysfs_init(void)
648{
649 int err;
650
651 mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL);
652 if (!mci_pdev)
653 return -ENOMEM;
654
655 mci_pdev->bus = edac_get_sysfs_subsys();
656 mci_pdev->release = mc_attr_release;
657 mci_pdev->init_name = "mc";
658
659 err = device_register(mci_pdev);
660 if (err < 0) {
661 edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev));
662 put_device(mci_pdev);
663 return err;
664 }
665
666 edac_dbg(0, "device %s created\n", dev_name(mci_pdev));
667
668 return 0;
669}
670
671void edac_mc_sysfs_exit(void)
672{
673 device_unregister(mci_pdev);
674}