at master 16 kB view raw
1/* 2 * edac_mc kernel module 3 * (C) 2005-2007 Linux Networx (http://lnxi.com) 4 * 5 * This file may be distributed under the terms of the 6 * GNU General Public License. 7 * 8 * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com 9 * 10 * (c) 2012-2013 - Mauro Carvalho Chehab 11 * The entire API were re-written, and ported to use struct device 12 * 13 */ 14 15#include <linux/ctype.h> 16#include <linux/slab.h> 17#include <linux/edac.h> 18#include <linux/bug.h> 19#include <linux/pm_runtime.h> 20#include <linux/uaccess.h> 21 22#include "edac_mc.h" 23#include "edac_module.h" 24 25/* MC EDAC Controls, setable by module parameter, and sysfs */ 26static int edac_mc_log_ue = 1; 27static int edac_mc_log_ce = 1; 28static int edac_mc_panic_on_ue; 29static unsigned int edac_mc_poll_msec = 1000; 30 31/* Getter functions for above */ 32int edac_mc_get_log_ue(void) 33{ 34 return edac_mc_log_ue; 35} 36 37int edac_mc_get_log_ce(void) 38{ 39 return edac_mc_log_ce; 40} 41 42int edac_mc_get_panic_on_ue(void) 43{ 44 return edac_mc_panic_on_ue; 45} 46 47/* this is temporary */ 48unsigned int edac_mc_get_poll_msec(void) 49{ 50 return edac_mc_poll_msec; 51} 52 53static int edac_set_poll_msec(const char *val, const struct kernel_param *kp) 54{ 55 unsigned int i; 56 int ret; 57 58 if (!val) 59 return -EINVAL; 60 61 ret = kstrtouint(val, 0, &i); 62 if (ret) 63 return ret; 64 65 if (i < 1000) 66 return -EINVAL; 67 68 *((unsigned int *)kp->arg) = i; 69 70 /* notify edac_mc engine to reset the poll period */ 71 edac_mc_reset_delay_period(i); 72 73 return 0; 74} 75 76/* Parameter declarations for above */ 77module_param(edac_mc_panic_on_ue, int, 0644); 78MODULE_PARM_DESC(edac_mc_panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); 79module_param(edac_mc_log_ue, int, 0644); 80MODULE_PARM_DESC(edac_mc_log_ue, 81 "Log uncorrectable error to console: 0=off 1=on"); 82module_param(edac_mc_log_ce, int, 0644); 83MODULE_PARM_DESC(edac_mc_log_ce, 84 "Log correctable error to console: 0=off 1=on"); 85module_param_call(edac_mc_poll_msec, edac_set_poll_msec, param_get_uint, 86 &edac_mc_poll_msec, 0644); 87MODULE_PARM_DESC(edac_mc_poll_msec, "Polling period in milliseconds"); 88 89static struct device *mci_pdev; 90 91/* 92 * various constants for Memory Controllers 93 */ 94static const char * const dev_types[] = { 95 [DEV_UNKNOWN] = "Unknown", 96 [DEV_X1] = "x1", 97 [DEV_X2] = "x2", 98 [DEV_X4] = "x4", 99 [DEV_X8] = "x8", 100 [DEV_X16] = "x16", 101 [DEV_X32] = "x32", 102 [DEV_X64] = "x64" 103}; 104 105static const char * const edac_caps[] = { 106 [EDAC_UNKNOWN] = "Unknown", 107 [EDAC_NONE] = "None", 108 [EDAC_RESERVED] = "Reserved", 109 [EDAC_PARITY] = "PARITY", 110 [EDAC_EC] = "EC", 111 [EDAC_SECDED] = "SECDED", 112 [EDAC_S2ECD2ED] = "S2ECD2ED", 113 [EDAC_S4ECD4ED] = "S4ECD4ED", 114 [EDAC_S8ECD8ED] = "S8ECD8ED", 115 [EDAC_S16ECD16ED] = "S16ECD16ED" 116}; 117 118/* 119 * Per-dimm (or per-rank) devices 120 */ 121 122#define to_dimm(k) container_of(k, struct dimm_info, dev) 123 124/* show/store functions for DIMM Label attributes */ 125static ssize_t dimmdev_location_show(struct device *dev, 126 struct device_attribute *mattr, char *data) 127{ 128 struct dimm_info *dimm = to_dimm(dev); 129 ssize_t count; 130 131 count = edac_dimm_info_location(dimm, data, PAGE_SIZE); 132 count += scnprintf(data + count, PAGE_SIZE - count, "\n"); 133 134 return count; 135} 136 137static ssize_t dimmdev_label_show(struct device *dev, 138 struct device_attribute *mattr, char *data) 139{ 140 struct dimm_info *dimm = to_dimm(dev); 141 142 /* if field has not been initialized, there is nothing to send */ 143 if (!dimm->label[0]) 144 return 0; 145 146 return sysfs_emit(data, "%s\n", dimm->label); 147} 148 149static ssize_t dimmdev_label_store(struct device *dev, 150 struct device_attribute *mattr, 151 const char *data, 152 size_t count) 153{ 154 struct dimm_info *dimm = to_dimm(dev); 155 size_t copy_count = count; 156 157 if (count == 0) 158 return -EINVAL; 159 160 if (data[count - 1] == '\0' || data[count - 1] == '\n') 161 copy_count -= 1; 162 163 if (copy_count == 0 || copy_count >= sizeof(dimm->label)) 164 return -EINVAL; 165 166 memcpy(dimm->label, data, copy_count); 167 dimm->label[copy_count] = '\0'; 168 169 return count; 170} 171 172static ssize_t dimmdev_size_show(struct device *dev, 173 struct device_attribute *mattr, char *data) 174{ 175 struct dimm_info *dimm = to_dimm(dev); 176 177 return sysfs_emit(data, "%u\n", PAGES_TO_MiB(dimm->nr_pages)); 178} 179 180static ssize_t dimmdev_mem_type_show(struct device *dev, 181 struct device_attribute *mattr, char *data) 182{ 183 struct dimm_info *dimm = to_dimm(dev); 184 185 return sysfs_emit(data, "%s\n", edac_mem_types[dimm->mtype]); 186} 187 188static ssize_t dimmdev_dev_type_show(struct device *dev, 189 struct device_attribute *mattr, char *data) 190{ 191 struct dimm_info *dimm = to_dimm(dev); 192 193 return sysfs_emit(data, "%s\n", dev_types[dimm->dtype]); 194} 195 196static ssize_t dimmdev_edac_mode_show(struct device *dev, 197 struct device_attribute *mattr, 198 char *data) 199{ 200 struct dimm_info *dimm = to_dimm(dev); 201 202 return sysfs_emit(data, "%s\n", edac_caps[dimm->edac_mode]); 203} 204 205static ssize_t dimmdev_ce_count_show(struct device *dev, 206 struct device_attribute *mattr, 207 char *data) 208{ 209 struct dimm_info *dimm = to_dimm(dev); 210 211 return sysfs_emit(data, "%u\n", dimm->ce_count); 212} 213 214static ssize_t dimmdev_ue_count_show(struct device *dev, 215 struct device_attribute *mattr, 216 char *data) 217{ 218 struct dimm_info *dimm = to_dimm(dev); 219 220 return sysfs_emit(data, "%u\n", dimm->ue_count); 221} 222 223/* dimm/rank attribute files */ 224static DEVICE_ATTR(dimm_label, S_IRUGO | S_IWUSR, 225 dimmdev_label_show, dimmdev_label_store); 226static DEVICE_ATTR(dimm_location, S_IRUGO, dimmdev_location_show, NULL); 227static DEVICE_ATTR(size, S_IRUGO, dimmdev_size_show, NULL); 228static DEVICE_ATTR(dimm_mem_type, S_IRUGO, dimmdev_mem_type_show, NULL); 229static DEVICE_ATTR(dimm_dev_type, S_IRUGO, dimmdev_dev_type_show, NULL); 230static DEVICE_ATTR(dimm_edac_mode, S_IRUGO, dimmdev_edac_mode_show, NULL); 231static DEVICE_ATTR(dimm_ce_count, S_IRUGO, dimmdev_ce_count_show, NULL); 232static DEVICE_ATTR(dimm_ue_count, S_IRUGO, dimmdev_ue_count_show, NULL); 233 234/* attributes of the dimm<id>/rank<id> object */ 235static struct attribute *dimm_attrs[] = { 236 &dev_attr_dimm_label.attr, 237 &dev_attr_dimm_location.attr, 238 &dev_attr_size.attr, 239 &dev_attr_dimm_mem_type.attr, 240 &dev_attr_dimm_dev_type.attr, 241 &dev_attr_dimm_edac_mode.attr, 242 &dev_attr_dimm_ce_count.attr, 243 &dev_attr_dimm_ue_count.attr, 244 NULL, 245}; 246 247static const struct attribute_group dimm_attr_grp = { 248 .attrs = dimm_attrs, 249}; 250 251static const struct attribute_group *dimm_attr_groups[] = { 252 &dimm_attr_grp, 253 NULL 254}; 255 256static const struct device_type dimm_attr_type = { 257 .groups = dimm_attr_groups, 258}; 259 260static void dimm_release(struct device *dev) 261{ 262 /* 263 * Nothing to do, just unregister sysfs here. The mci 264 * device owns the data and will also release it. 265 */ 266} 267 268/* Create a DIMM object under specified memory controller device */ 269static int edac_create_dimm_object(struct mem_ctl_info *mci, 270 struct dimm_info *dimm) 271{ 272 int err; 273 dimm->mci = mci; 274 275 dimm->dev.type = &dimm_attr_type; 276 dimm->dev.release = dimm_release; 277 device_initialize(&dimm->dev); 278 279 dimm->dev.parent = &mci->dev; 280 if (mci->csbased) 281 dev_set_name(&dimm->dev, "rank%d", dimm->idx); 282 else 283 dev_set_name(&dimm->dev, "dimm%d", dimm->idx); 284 dev_set_drvdata(&dimm->dev, dimm); 285 pm_runtime_forbid(&mci->dev); 286 287 err = device_add(&dimm->dev); 288 if (err) { 289 edac_dbg(1, "failure: create device %s\n", dev_name(&dimm->dev)); 290 put_device(&dimm->dev); 291 return err; 292 } 293 294 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { 295 char location[80]; 296 297 edac_dimm_info_location(dimm, location, sizeof(location)); 298 edac_dbg(0, "device %s created at location %s\n", 299 dev_name(&dimm->dev), location); 300 } 301 302 return 0; 303} 304 305/* 306 * Memory controller device 307 */ 308 309#define to_mci(k) container_of(k, struct mem_ctl_info, dev) 310 311static ssize_t mci_reset_counters_store(struct device *dev, 312 struct device_attribute *mattr, 313 const char *data, size_t count) 314{ 315 struct mem_ctl_info *mci = to_mci(dev); 316 struct dimm_info *dimm; 317 int row, chan; 318 319 mci->ue_mc = 0; 320 mci->ce_mc = 0; 321 mci->ue_noinfo_count = 0; 322 mci->ce_noinfo_count = 0; 323 324 for (row = 0; row < mci->nr_csrows; row++) { 325 struct csrow_info *ri = mci->csrows[row]; 326 327 ri->ue_count = 0; 328 ri->ce_count = 0; 329 330 for (chan = 0; chan < ri->nr_channels; chan++) 331 ri->channels[chan]->ce_count = 0; 332 } 333 334 mci_for_each_dimm(mci, dimm) { 335 dimm->ue_count = 0; 336 dimm->ce_count = 0; 337 } 338 339 mci->start_time = jiffies; 340 return count; 341} 342 343/* Memory scrubbing interface: 344 * 345 * A MC driver can limit the scrubbing bandwidth based on the CPU type. 346 * Therefore, ->set_sdram_scrub_rate should be made to return the actual 347 * bandwidth that is accepted or 0 when scrubbing is to be disabled. 348 * 349 * Negative value still means that an error has occurred while setting 350 * the scrub rate. 351 */ 352static ssize_t mci_sdram_scrub_rate_store(struct device *dev, 353 struct device_attribute *mattr, 354 const char *data, size_t count) 355{ 356 struct mem_ctl_info *mci = to_mci(dev); 357 unsigned long bandwidth = 0; 358 int new_bw = 0; 359 360 if (kstrtoul(data, 10, &bandwidth) < 0) 361 return -EINVAL; 362 363 new_bw = mci->set_sdram_scrub_rate(mci, bandwidth); 364 if (new_bw < 0) { 365 edac_printk(KERN_WARNING, EDAC_MC, 366 "Error setting scrub rate to: %lu\n", bandwidth); 367 return -EINVAL; 368 } 369 370 return count; 371} 372 373/* 374 * ->get_sdram_scrub_rate() return value semantics same as above. 375 */ 376static ssize_t mci_sdram_scrub_rate_show(struct device *dev, 377 struct device_attribute *mattr, 378 char *data) 379{ 380 struct mem_ctl_info *mci = to_mci(dev); 381 int bandwidth = 0; 382 383 bandwidth = mci->get_sdram_scrub_rate(mci); 384 if (bandwidth < 0) { 385 edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); 386 return bandwidth; 387 } 388 389 return sysfs_emit(data, "%d\n", bandwidth); 390} 391 392/* default attribute files for the MCI object */ 393static ssize_t mci_ue_count_show(struct device *dev, 394 struct device_attribute *mattr, 395 char *data) 396{ 397 struct mem_ctl_info *mci = to_mci(dev); 398 399 return sysfs_emit(data, "%u\n", mci->ue_mc); 400} 401 402static ssize_t mci_ce_count_show(struct device *dev, 403 struct device_attribute *mattr, 404 char *data) 405{ 406 struct mem_ctl_info *mci = to_mci(dev); 407 408 return sysfs_emit(data, "%u\n", mci->ce_mc); 409} 410 411static ssize_t mci_ce_noinfo_show(struct device *dev, 412 struct device_attribute *mattr, 413 char *data) 414{ 415 struct mem_ctl_info *mci = to_mci(dev); 416 417 return sysfs_emit(data, "%u\n", mci->ce_noinfo_count); 418} 419 420static ssize_t mci_ue_noinfo_show(struct device *dev, 421 struct device_attribute *mattr, 422 char *data) 423{ 424 struct mem_ctl_info *mci = to_mci(dev); 425 426 return sysfs_emit(data, "%u\n", mci->ue_noinfo_count); 427} 428 429static ssize_t mci_seconds_show(struct device *dev, 430 struct device_attribute *mattr, 431 char *data) 432{ 433 struct mem_ctl_info *mci = to_mci(dev); 434 435 return sysfs_emit(data, "%ld\n", (jiffies - mci->start_time) / HZ); 436} 437 438static ssize_t mci_ctl_name_show(struct device *dev, 439 struct device_attribute *mattr, 440 char *data) 441{ 442 struct mem_ctl_info *mci = to_mci(dev); 443 444 return sysfs_emit(data, "%s\n", mci->ctl_name); 445} 446 447static ssize_t mci_size_mb_show(struct device *dev, 448 struct device_attribute *mattr, 449 char *data) 450{ 451 struct mem_ctl_info *mci = to_mci(dev); 452 int total_pages = 0, csrow_idx, j; 453 454 for (csrow_idx = 0; csrow_idx < mci->nr_csrows; csrow_idx++) { 455 struct csrow_info *csrow = mci->csrows[csrow_idx]; 456 457 for (j = 0; j < csrow->nr_channels; j++) { 458 struct dimm_info *dimm = csrow->channels[j]->dimm; 459 460 total_pages += dimm->nr_pages; 461 } 462 } 463 464 return sysfs_emit(data, "%u\n", PAGES_TO_MiB(total_pages)); 465} 466 467static ssize_t mci_max_location_show(struct device *dev, 468 struct device_attribute *mattr, 469 char *data) 470{ 471 struct mem_ctl_info *mci = to_mci(dev); 472 int len = PAGE_SIZE; 473 char *p = data; 474 int i, n; 475 476 for (i = 0; i < mci->n_layers; i++) { 477 n = scnprintf(p, len, "%s %d ", 478 edac_layer_name[mci->layers[i].type], 479 mci->layers[i].size - 1); 480 len -= n; 481 if (len <= 0) 482 goto out; 483 484 p += n; 485 } 486 487 p += scnprintf(p, len, "\n"); 488out: 489 return p - data; 490} 491 492/* default Control file */ 493static DEVICE_ATTR(reset_counters, S_IWUSR, NULL, mci_reset_counters_store); 494 495/* default Attribute files */ 496static DEVICE_ATTR(mc_name, S_IRUGO, mci_ctl_name_show, NULL); 497static DEVICE_ATTR(size_mb, S_IRUGO, mci_size_mb_show, NULL); 498static DEVICE_ATTR(seconds_since_reset, S_IRUGO, mci_seconds_show, NULL); 499static DEVICE_ATTR(ue_noinfo_count, S_IRUGO, mci_ue_noinfo_show, NULL); 500static DEVICE_ATTR(ce_noinfo_count, S_IRUGO, mci_ce_noinfo_show, NULL); 501static DEVICE_ATTR(ue_count, S_IRUGO, mci_ue_count_show, NULL); 502static DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); 503static DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); 504 505/* memory scrubber attribute file */ 506static DEVICE_ATTR(sdram_scrub_rate, 0, mci_sdram_scrub_rate_show, 507 mci_sdram_scrub_rate_store); /* umode set later in is_visible */ 508 509static struct attribute *mci_attrs[] = { 510 &dev_attr_reset_counters.attr, 511 &dev_attr_mc_name.attr, 512 &dev_attr_size_mb.attr, 513 &dev_attr_seconds_since_reset.attr, 514 &dev_attr_ue_noinfo_count.attr, 515 &dev_attr_ce_noinfo_count.attr, 516 &dev_attr_ue_count.attr, 517 &dev_attr_ce_count.attr, 518 &dev_attr_max_location.attr, 519 &dev_attr_sdram_scrub_rate.attr, 520 NULL 521}; 522 523static umode_t mci_attr_is_visible(struct kobject *kobj, 524 struct attribute *attr, int idx) 525{ 526 struct device *dev = kobj_to_dev(kobj); 527 struct mem_ctl_info *mci = to_mci(dev); 528 umode_t mode = 0; 529 530 if (attr != &dev_attr_sdram_scrub_rate.attr) 531 return attr->mode; 532 if (mci->get_sdram_scrub_rate) 533 mode |= S_IRUGO; 534 if (mci->set_sdram_scrub_rate) 535 mode |= S_IWUSR; 536 return mode; 537} 538 539static const struct attribute_group mci_attr_grp = { 540 .attrs = mci_attrs, 541 .is_visible = mci_attr_is_visible, 542}; 543 544static const struct attribute_group *mci_attr_groups[] = { 545 &mci_attr_grp, 546 NULL 547}; 548 549static const struct device_type mci_attr_type = { 550 .groups = mci_attr_groups, 551}; 552 553/* 554 * Create a new Memory Controller kobject instance, 555 * mc<id> under the 'mc' directory 556 * 557 * Return: 558 * 0 Success 559 * !0 Failure 560 */ 561int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, 562 const struct attribute_group **groups) 563{ 564 struct dimm_info *dimm; 565 int err; 566 567 /* get the /sys/devices/system/edac subsys reference */ 568 mci->dev.type = &mci_attr_type; 569 mci->dev.parent = mci_pdev; 570 mci->dev.groups = groups; 571 dev_set_name(&mci->dev, "mc%d", mci->mc_idx); 572 dev_set_drvdata(&mci->dev, mci); 573 pm_runtime_forbid(&mci->dev); 574 575 err = device_add(&mci->dev); 576 if (err < 0) { 577 edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); 578 /* no put_device() here, free mci with _edac_mc_free() */ 579 return err; 580 } 581 582 edac_dbg(0, "device %s created\n", dev_name(&mci->dev)); 583 584 /* 585 * Create the dimm/rank devices 586 */ 587 mci_for_each_dimm(mci, dimm) { 588 /* Only expose populated DIMMs */ 589 if (!dimm->nr_pages) 590 continue; 591 592 err = edac_create_dimm_object(mci, dimm); 593 if (err) 594 goto fail; 595 } 596 597 edac_create_debugfs_nodes(mci); 598 return 0; 599 600fail: 601 edac_remove_sysfs_mci_device(mci); 602 603 return err; 604} 605 606/* 607 * remove a Memory Controller instance 608 */ 609void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) 610{ 611 struct dimm_info *dimm; 612 613 if (!device_is_registered(&mci->dev)) 614 return; 615 616 edac_dbg(0, "\n"); 617 618#ifdef CONFIG_EDAC_DEBUG 619 edac_debugfs_remove_recursive(mci->debugfs); 620#endif 621 622 mci_for_each_dimm(mci, dimm) { 623 if (!device_is_registered(&dimm->dev)) 624 continue; 625 edac_dbg(1, "unregistering device %s\n", dev_name(&dimm->dev)); 626 device_unregister(&dimm->dev); 627 } 628 629 /* only remove the device, but keep mci */ 630 device_del(&mci->dev); 631} 632 633static void mc_attr_release(struct device *dev) 634{ 635 /* 636 * There's no container structure here, as this is just the mci 637 * parent device, used to create the /sys/devices/mc sysfs node. 638 * So, there are no attributes on it. 639 */ 640 edac_dbg(1, "device %s released\n", dev_name(dev)); 641 kfree(dev); 642} 643 644/* 645 * Init/exit code for the module. Basically, creates/removes /sys/class/rc 646 */ 647int __init edac_mc_sysfs_init(void) 648{ 649 int err; 650 651 mci_pdev = kzalloc(sizeof(*mci_pdev), GFP_KERNEL); 652 if (!mci_pdev) 653 return -ENOMEM; 654 655 mci_pdev->bus = edac_get_sysfs_subsys(); 656 mci_pdev->release = mc_attr_release; 657 mci_pdev->init_name = "mc"; 658 659 err = device_register(mci_pdev); 660 if (err < 0) { 661 edac_dbg(1, "failure: create device %s\n", dev_name(mci_pdev)); 662 put_device(mci_pdev); 663 return err; 664 } 665 666 edac_dbg(0, "device %s created\n", dev_name(mci_pdev)); 667 668 return 0; 669} 670 671void edac_mc_sysfs_exit(void) 672{ 673 device_unregister(mci_pdev); 674}