Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

EDAC/amd64: Merge error injection sysfs facilities

Merge them into the main driver and put them inside an EDAC_DEBUG
ifdeffery to simplify the driver and have all debugging/injection stuff
behind a debug build-time switch.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lkml.kernel.org/r/20201215110517.5215-2-bp@alien8.de

+235 -252
+3 -4
drivers/edac/Kconfig
··· 81 81 Support for error detection and correction of DRAM ECC errors on 82 82 the AMD64 families (>= K8) of memory controllers. 83 83 84 - config EDAC_AMD64_ERROR_INJECTION 85 - bool "Sysfs HW Error injection facilities" 86 - depends on EDAC_AMD64 87 - help 84 + When EDAC_DEBUG is enabled, hardware error injection facilities 85 + through sysfs are available: 86 + 88 87 Recent Opterons (Family 10h and later) provide for Memory Error 89 88 Injection into the ECC detection circuits. The amd64_edac module 90 89 allows the operator/user to inject Uncorrectable and Correctable
+1 -5
drivers/edac/Makefile
··· 44 44 obj-$(CONFIG_EDAC_X38) += x38_edac.o 45 45 obj-$(CONFIG_EDAC_I82860) += i82860_edac.o 46 46 obj-$(CONFIG_EDAC_R82600) += r82600_edac.o 47 - 48 - amd64_edac_mod-y := amd64_edac.o 49 - amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o 50 - 51 - obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o 47 + obj-$(CONFIG_EDAC_AMD64) += amd64_edac.o 52 48 53 49 obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o 54 50
+231 -4
drivers/edac/amd64_edac.c
··· 607 607 static const struct attribute_group dbg_group = { 608 608 .attrs = dbg_attrs, 609 609 }; 610 - #endif /* CONFIG_EDAC_DEBUG */ 611 610 611 + static ssize_t inject_section_show(struct device *dev, 612 + struct device_attribute *mattr, char *buf) 613 + { 614 + struct mem_ctl_info *mci = to_mci(dev); 615 + struct amd64_pvt *pvt = mci->pvt_info; 616 + return sprintf(buf, "0x%x\n", pvt->injection.section); 617 + } 618 + 619 + /* 620 + * store error injection section value which refers to one of 4 16-byte sections 621 + * within a 64-byte cacheline 622 + * 623 + * range: 0..3 624 + */ 625 + static ssize_t inject_section_store(struct device *dev, 626 + struct device_attribute *mattr, 627 + const char *data, size_t count) 628 + { 629 + struct mem_ctl_info *mci = to_mci(dev); 630 + struct amd64_pvt *pvt = mci->pvt_info; 631 + unsigned long value; 632 + int ret; 633 + 634 + ret = kstrtoul(data, 10, &value); 635 + if (ret < 0) 636 + return ret; 637 + 638 + if (value > 3) { 639 + amd64_warn("%s: invalid section 0x%lx\n", __func__, value); 640 + return -EINVAL; 641 + } 642 + 643 + pvt->injection.section = (u32) value; 644 + return count; 645 + } 646 + 647 + static ssize_t inject_word_show(struct device *dev, 648 + struct device_attribute *mattr, char *buf) 649 + { 650 + struct mem_ctl_info *mci = to_mci(dev); 651 + struct amd64_pvt *pvt = mci->pvt_info; 652 + return sprintf(buf, "0x%x\n", pvt->injection.word); 653 + } 654 + 655 + /* 656 + * store error injection word value which refers to one of 9 16-bit word of the 657 + * 16-byte (128-bit + ECC bits) section 658 + * 659 + * range: 0..8 660 + */ 661 + static ssize_t inject_word_store(struct device *dev, 662 + struct device_attribute *mattr, 663 + const char *data, size_t count) 664 + { 665 + struct mem_ctl_info *mci = to_mci(dev); 666 + struct amd64_pvt *pvt = mci->pvt_info; 667 + unsigned long value; 668 + int ret; 669 + 670 + ret = kstrtoul(data, 10, &value); 671 + if (ret < 0) 672 + return ret; 673 + 674 + if (value > 8) { 675 + amd64_warn("%s: invalid word 0x%lx\n", __func__, value); 676 + return -EINVAL; 677 + } 678 + 679 + pvt->injection.word = (u32) value; 680 + return count; 681 + } 682 + 683 + static ssize_t inject_ecc_vector_show(struct device *dev, 684 + struct device_attribute *mattr, 685 + char *buf) 686 + { 687 + struct mem_ctl_info *mci = to_mci(dev); 688 + struct amd64_pvt *pvt = mci->pvt_info; 689 + return sprintf(buf, "0x%x\n", pvt->injection.bit_map); 690 + } 691 + 692 + /* 693 + * store 16 bit error injection vector which enables injecting errors to the 694 + * corresponding bit within the error injection word above. When used during a 695 + * DRAM ECC read, it holds the contents of the of the DRAM ECC bits. 696 + */ 697 + static ssize_t inject_ecc_vector_store(struct device *dev, 698 + struct device_attribute *mattr, 699 + const char *data, size_t count) 700 + { 701 + struct mem_ctl_info *mci = to_mci(dev); 702 + struct amd64_pvt *pvt = mci->pvt_info; 703 + unsigned long value; 704 + int ret; 705 + 706 + ret = kstrtoul(data, 16, &value); 707 + if (ret < 0) 708 + return ret; 709 + 710 + if (value & 0xFFFF0000) { 711 + amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); 712 + return -EINVAL; 713 + } 714 + 715 + pvt->injection.bit_map = (u32) value; 716 + return count; 717 + } 718 + 719 + /* 720 + * Do a DRAM ECC read. Assemble staged values in the pvt area, format into 721 + * fields needed by the injection registers and read the NB Array Data Port. 722 + */ 723 + static ssize_t inject_read_store(struct device *dev, 724 + struct device_attribute *mattr, 725 + const char *data, size_t count) 726 + { 727 + struct mem_ctl_info *mci = to_mci(dev); 728 + struct amd64_pvt *pvt = mci->pvt_info; 729 + unsigned long value; 730 + u32 section, word_bits; 731 + int ret; 732 + 733 + ret = kstrtoul(data, 10, &value); 734 + if (ret < 0) 735 + return ret; 736 + 737 + /* Form value to choose 16-byte section of cacheline */ 738 + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); 739 + 740 + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); 741 + 742 + word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); 743 + 744 + /* Issue 'word' and 'bit' along with the READ request */ 745 + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); 746 + 747 + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); 748 + 749 + return count; 750 + } 751 + 752 + /* 753 + * Do a DRAM ECC write. Assemble staged values in the pvt area and format into 754 + * fields needed by the injection registers. 755 + */ 756 + static ssize_t inject_write_store(struct device *dev, 757 + struct device_attribute *mattr, 758 + const char *data, size_t count) 759 + { 760 + struct mem_ctl_info *mci = to_mci(dev); 761 + struct amd64_pvt *pvt = mci->pvt_info; 762 + u32 section, word_bits, tmp; 763 + unsigned long value; 764 + int ret; 765 + 766 + ret = kstrtoul(data, 10, &value); 767 + if (ret < 0) 768 + return ret; 769 + 770 + /* Form value to choose 16-byte section of cacheline */ 771 + section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); 772 + 773 + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); 774 + 775 + word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); 776 + 777 + pr_notice_once("Don't forget to decrease MCE polling interval in\n" 778 + "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" 779 + "so that you can get the error report faster.\n"); 780 + 781 + on_each_cpu(disable_caches, NULL, 1); 782 + 783 + /* Issue 'word' and 'bit' along with the READ request */ 784 + amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); 785 + 786 + retry: 787 + /* wait until injection happens */ 788 + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); 789 + if (tmp & F10_NB_ARR_ECC_WR_REQ) { 790 + cpu_relax(); 791 + goto retry; 792 + } 793 + 794 + on_each_cpu(enable_caches, NULL, 1); 795 + 796 + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); 797 + 798 + return count; 799 + } 800 + 801 + /* 802 + * update NUM_INJ_ATTRS in case you add new members 803 + */ 804 + 805 + static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, 806 + inject_section_show, inject_section_store); 807 + static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, 808 + inject_word_show, inject_word_store); 809 + static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, 810 + inject_ecc_vector_show, inject_ecc_vector_store); 811 + static DEVICE_ATTR(inject_write, S_IWUSR, 812 + NULL, inject_write_store); 813 + static DEVICE_ATTR(inject_read, S_IWUSR, 814 + NULL, inject_read_store); 815 + 816 + static struct attribute *inj_attrs[] = { 817 + &dev_attr_inject_section.attr, 818 + &dev_attr_inject_word.attr, 819 + &dev_attr_inject_ecc_vector.attr, 820 + &dev_attr_inject_write.attr, 821 + &dev_attr_inject_read.attr, 822 + NULL 823 + }; 824 + 825 + static umode_t inj_is_visible(struct kobject *kobj, struct attribute *attr, int idx) 826 + { 827 + struct device *dev = kobj_to_dev(kobj); 828 + struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); 829 + struct amd64_pvt *pvt = mci->pvt_info; 830 + 831 + if (pvt->fam < 0x10) 832 + return 0; 833 + return attr->mode; 834 + } 835 + 836 + static const struct attribute_group inj_group = { 837 + .attrs = inj_attrs, 838 + .is_visible = inj_is_visible, 839 + }; 840 + #endif /* CONFIG_EDAC_DEBUG */ 612 841 613 842 /* 614 843 * Return the DramAddr that the SysAddr given by @sys_addr maps to. It is ··· 3698 3469 static const struct attribute_group *amd64_edac_attr_groups[] = { 3699 3470 #ifdef CONFIG_EDAC_DEBUG 3700 3471 &dbg_group, 3701 - #endif 3702 - #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION 3703 - &amd64_edac_inj_group, 3472 + &inj_group, 3704 3473 #endif 3705 3474 NULL 3706 3475 };
-4
drivers/edac/amd64_edac.h
··· 462 462 } flags; 463 463 }; 464 464 465 - #ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION 466 - extern const struct attribute_group amd64_edac_inj_group; 467 - #endif 468 - 469 465 /* 470 466 * Each of the PCI Device IDs types have their own set of hardware accessor 471 467 * functions and per device encoding/decoding logic.
-235
drivers/edac/amd64_edac_inj.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include "amd64_edac.h" 3 - 4 - static ssize_t amd64_inject_section_show(struct device *dev, 5 - struct device_attribute *mattr, 6 - char *buf) 7 - { 8 - struct mem_ctl_info *mci = to_mci(dev); 9 - struct amd64_pvt *pvt = mci->pvt_info; 10 - return sprintf(buf, "0x%x\n", pvt->injection.section); 11 - } 12 - 13 - /* 14 - * store error injection section value which refers to one of 4 16-byte sections 15 - * within a 64-byte cacheline 16 - * 17 - * range: 0..3 18 - */ 19 - static ssize_t amd64_inject_section_store(struct device *dev, 20 - struct device_attribute *mattr, 21 - const char *data, size_t count) 22 - { 23 - struct mem_ctl_info *mci = to_mci(dev); 24 - struct amd64_pvt *pvt = mci->pvt_info; 25 - unsigned long value; 26 - int ret; 27 - 28 - ret = kstrtoul(data, 10, &value); 29 - if (ret < 0) 30 - return ret; 31 - 32 - if (value > 3) { 33 - amd64_warn("%s: invalid section 0x%lx\n", __func__, value); 34 - return -EINVAL; 35 - } 36 - 37 - pvt->injection.section = (u32) value; 38 - return count; 39 - } 40 - 41 - static ssize_t amd64_inject_word_show(struct device *dev, 42 - struct device_attribute *mattr, 43 - char *buf) 44 - { 45 - struct mem_ctl_info *mci = to_mci(dev); 46 - struct amd64_pvt *pvt = mci->pvt_info; 47 - return sprintf(buf, "0x%x\n", pvt->injection.word); 48 - } 49 - 50 - /* 51 - * store error injection word value which refers to one of 9 16-bit word of the 52 - * 16-byte (128-bit + ECC bits) section 53 - * 54 - * range: 0..8 55 - */ 56 - static ssize_t amd64_inject_word_store(struct device *dev, 57 - struct device_attribute *mattr, 58 - const char *data, size_t count) 59 - { 60 - struct mem_ctl_info *mci = to_mci(dev); 61 - struct amd64_pvt *pvt = mci->pvt_info; 62 - unsigned long value; 63 - int ret; 64 - 65 - ret = kstrtoul(data, 10, &value); 66 - if (ret < 0) 67 - return ret; 68 - 69 - if (value > 8) { 70 - amd64_warn("%s: invalid word 0x%lx\n", __func__, value); 71 - return -EINVAL; 72 - } 73 - 74 - pvt->injection.word = (u32) value; 75 - return count; 76 - } 77 - 78 - static ssize_t amd64_inject_ecc_vector_show(struct device *dev, 79 - struct device_attribute *mattr, 80 - char *buf) 81 - { 82 - struct mem_ctl_info *mci = to_mci(dev); 83 - struct amd64_pvt *pvt = mci->pvt_info; 84 - return sprintf(buf, "0x%x\n", pvt->injection.bit_map); 85 - } 86 - 87 - /* 88 - * store 16 bit error injection vector which enables injecting errors to the 89 - * corresponding bit within the error injection word above. When used during a 90 - * DRAM ECC read, it holds the contents of the of the DRAM ECC bits. 91 - */ 92 - static ssize_t amd64_inject_ecc_vector_store(struct device *dev, 93 - struct device_attribute *mattr, 94 - const char *data, size_t count) 95 - { 96 - struct mem_ctl_info *mci = to_mci(dev); 97 - struct amd64_pvt *pvt = mci->pvt_info; 98 - unsigned long value; 99 - int ret; 100 - 101 - ret = kstrtoul(data, 16, &value); 102 - if (ret < 0) 103 - return ret; 104 - 105 - if (value & 0xFFFF0000) { 106 - amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value); 107 - return -EINVAL; 108 - } 109 - 110 - pvt->injection.bit_map = (u32) value; 111 - return count; 112 - } 113 - 114 - /* 115 - * Do a DRAM ECC read. Assemble staged values in the pvt area, format into 116 - * fields needed by the injection registers and read the NB Array Data Port. 117 - */ 118 - static ssize_t amd64_inject_read_store(struct device *dev, 119 - struct device_attribute *mattr, 120 - const char *data, size_t count) 121 - { 122 - struct mem_ctl_info *mci = to_mci(dev); 123 - struct amd64_pvt *pvt = mci->pvt_info; 124 - unsigned long value; 125 - u32 section, word_bits; 126 - int ret; 127 - 128 - ret = kstrtoul(data, 10, &value); 129 - if (ret < 0) 130 - return ret; 131 - 132 - /* Form value to choose 16-byte section of cacheline */ 133 - section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); 134 - 135 - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); 136 - 137 - word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection); 138 - 139 - /* Issue 'word' and 'bit' along with the READ request */ 140 - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); 141 - 142 - edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); 143 - 144 - return count; 145 - } 146 - 147 - /* 148 - * Do a DRAM ECC write. Assemble staged values in the pvt area and format into 149 - * fields needed by the injection registers. 150 - */ 151 - static ssize_t amd64_inject_write_store(struct device *dev, 152 - struct device_attribute *mattr, 153 - const char *data, size_t count) 154 - { 155 - struct mem_ctl_info *mci = to_mci(dev); 156 - struct amd64_pvt *pvt = mci->pvt_info; 157 - u32 section, word_bits, tmp; 158 - unsigned long value; 159 - int ret; 160 - 161 - ret = kstrtoul(data, 10, &value); 162 - if (ret < 0) 163 - return ret; 164 - 165 - /* Form value to choose 16-byte section of cacheline */ 166 - section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section); 167 - 168 - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section); 169 - 170 - word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); 171 - 172 - pr_notice_once("Don't forget to decrease MCE polling interval in\n" 173 - "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" 174 - "so that you can get the error report faster.\n"); 175 - 176 - on_each_cpu(disable_caches, NULL, 1); 177 - 178 - /* Issue 'word' and 'bit' along with the READ request */ 179 - amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); 180 - 181 - retry: 182 - /* wait until injection happens */ 183 - amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); 184 - if (tmp & F10_NB_ARR_ECC_WR_REQ) { 185 - cpu_relax(); 186 - goto retry; 187 - } 188 - 189 - on_each_cpu(enable_caches, NULL, 1); 190 - 191 - edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); 192 - 193 - return count; 194 - } 195 - 196 - /* 197 - * update NUM_INJ_ATTRS in case you add new members 198 - */ 199 - 200 - static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR, 201 - amd64_inject_section_show, amd64_inject_section_store); 202 - static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR, 203 - amd64_inject_word_show, amd64_inject_word_store); 204 - static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR, 205 - amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store); 206 - static DEVICE_ATTR(inject_write, S_IWUSR, 207 - NULL, amd64_inject_write_store); 208 - static DEVICE_ATTR(inject_read, S_IWUSR, 209 - NULL, amd64_inject_read_store); 210 - 211 - static struct attribute *amd64_edac_inj_attrs[] = { 212 - &dev_attr_inject_section.attr, 213 - &dev_attr_inject_word.attr, 214 - &dev_attr_inject_ecc_vector.attr, 215 - &dev_attr_inject_write.attr, 216 - &dev_attr_inject_read.attr, 217 - NULL 218 - }; 219 - 220 - static umode_t amd64_edac_inj_is_visible(struct kobject *kobj, 221 - struct attribute *attr, int idx) 222 - { 223 - struct device *dev = kobj_to_dev(kobj); 224 - struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev); 225 - struct amd64_pvt *pvt = mci->pvt_info; 226 - 227 - if (pvt->fam < 0x10) 228 - return 0; 229 - return attr->mode; 230 - } 231 - 232 - const struct attribute_group amd64_edac_inj_group = { 233 - .attrs = amd64_edac_inj_attrs, 234 - .is_visible = amd64_edac_inj_is_visible, 235 - };