EDAC, skx_edac: Add EDAC driver for Skylake

This is an entirely new driver instead of yet another set of patches
to sb_edac.c because:

1) Mapping from PCI devices to socket/memory controller is significantly
different. Skylake scatters devices on a socket across a number of
PCI buses.
2) There is an extra level of interleaving via the "mcroute" register
that would be a little messy to squeeze into the old driver.
3) Validation is getting too expensive. Changes to sb_edac need to
be checked against Sandy Bridge, Ivy Bridge, Haswell, Broadwell and
Knights Landing.

Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by Tony Luck and committed by Linus Torvalds 4ec656bd 6040e576

Changed files
+1136
drivers
+6
MAINTAINERS
··· 4525 4525 S: Maintained 4526 4526 F: drivers/edac/sb_edac.c 4527 4527 4528 + EDAC-SKYLAKE 4529 + M: Tony Luck <tony.luck@intel.com> 4530 + L: linux-edac@vger.kernel.org 4531 + S: Maintained 4532 + F: drivers/edac/skx_edac.c 4533 + 4528 4534 EDAC-XGENE 4529 4535 APPLIED MICRO (APM) X-GENE SOC EDAC 4530 4536 M: Loc Ho <lho@apm.com>
+8
drivers/edac/Kconfig
··· 251 251 Support for error detection and correction the Intel 252 252 Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. 253 253 254 + config EDAC_SKX 255 + tristate "Intel Skylake server Integrated MC" 256 + depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL 257 + depends on PCI_MMCONFIG 258 + help 259 + Support for error detection and correction the Intel 260 + Skylake server Integrated Memory Controllers. 261 + 254 262 config EDAC_MPC85XX 255 263 tristate "Freescale MPC83xx / MPC85xx" 256 264 depends on EDAC_MM_EDAC && FSL_SOC
+1
drivers/edac/Makefile
··· 31 31 obj-$(CONFIG_EDAC_I7300) += i7300_edac.o 32 32 obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o 33 33 obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o 34 + obj-$(CONFIG_EDAC_SKX) += skx_edac.o 34 35 obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o 35 36 obj-$(CONFIG_EDAC_E752X) += e752x_edac.o 36 37 obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
+1121
drivers/edac/skx_edac.c
··· 1 + /* 2 + * EDAC driver for Intel(R) Xeon(R) Skylake processors 3 + * Copyright (c) 2016, Intel Corporation. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms and conditions of the GNU General Public License, 7 + * version 2, as published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope it will be useful, but WITHOUT 10 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + #include <linux/module.h> 16 + #include <linux/init.h> 17 + #include <linux/pci.h> 18 + #include <linux/pci_ids.h> 19 + #include <linux/slab.h> 20 + #include <linux/delay.h> 21 + #include <linux/edac.h> 22 + #include <linux/mmzone.h> 23 + #include <linux/smp.h> 24 + #include <linux/bitmap.h> 25 + #include <linux/math64.h> 26 + #include <linux/mod_devicetable.h> 27 + #include <asm/cpu_device_id.h> 28 + #include <asm/processor.h> 29 + #include <asm/mce.h> 30 + 31 + #include "edac_core.h" 32 + 33 + #define SKX_REVISION " Ver: 1.0 " 34 + 35 + /* 36 + * Debug macros 37 + */ 38 + #define skx_printk(level, fmt, arg...) \ 39 + edac_printk(level, "skx", fmt, ##arg) 40 + 41 + #define skx_mc_printk(mci, level, fmt, arg...) \ 42 + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) 43 + 44 + /* 45 + * Get a bit field at register value <v>, from bit <lo> to bit <hi> 46 + */ 47 + #define GET_BITFIELD(v, lo, hi) \ 48 + (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) 49 + 50 + static LIST_HEAD(skx_edac_list); 51 + 52 + static u64 skx_tolm, skx_tohm; 53 + 54 + #define NUM_IMC 2 /* memory controllers per socket */ 55 + #define NUM_CHANNELS 3 /* channels per memory controller */ 56 + #define NUM_DIMMS 2 /* Max DIMMS per channel */ 57 + 58 + #define MASK26 0x3FFFFFF /* Mask for 2^26 */ 59 + #define MASK29 0x1FFFFFFF /* Mask for 2^29 */ 60 + 61 + /* 62 + * Each cpu socket contains some pci devices that provide global 63 + * information, and also some that are local to each of the two 64 + * memory controllers on the die. 65 + */ 66 + struct skx_dev { 67 + struct list_head list; 68 + u8 bus[4]; 69 + struct pci_dev *sad_all; 70 + struct pci_dev *util_all; 71 + u32 mcroute; 72 + struct skx_imc { 73 + struct mem_ctl_info *mci; 74 + u8 mc; /* system wide mc# */ 75 + u8 lmc; /* socket relative mc# */ 76 + u8 src_id, node_id; 77 + struct skx_channel { 78 + struct pci_dev *cdev; 79 + struct skx_dimm { 80 + u8 close_pg; 81 + u8 bank_xor_enable; 82 + u8 fine_grain_bank; 83 + u8 rowbits; 84 + u8 colbits; 85 + } dimms[NUM_DIMMS]; 86 + } chan[NUM_CHANNELS]; 87 + } imc[NUM_IMC]; 88 + }; 89 + static int skx_num_sockets; 90 + 91 + struct skx_pvt { 92 + struct skx_imc *imc; 93 + }; 94 + 95 + struct decoded_addr { 96 + struct skx_dev *dev; 97 + u64 addr; 98 + int socket; 99 + int imc; 100 + int channel; 101 + u64 chan_addr; 102 + int sktways; 103 + int chanways; 104 + int dimm; 105 + int rank; 106 + int channel_rank; 107 + u64 rank_address; 108 + int row; 109 + int column; 110 + int bank_address; 111 + int bank_group; 112 + }; 113 + 114 + static struct skx_dev *get_skx_dev(u8 bus, u8 idx) 115 + { 116 + struct skx_dev *d; 117 + 118 + list_for_each_entry(d, &skx_edac_list, list) { 119 + if (d->bus[idx] == bus) 120 + return d; 121 + } 122 + 123 + return NULL; 124 + } 125 + 126 + enum munittype { 127 + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD 128 + }; 129 + 130 + struct munit { 131 + u16 did; 132 + u16 devfn[NUM_IMC]; 133 + u8 busidx; 134 + u8 per_socket; 135 + enum munittype mtype; 136 + }; 137 + 138 + /* 139 + * List of PCI device ids that we need together with some device 140 + * number and function numbers to tell which memory controller the 141 + * device belongs to. 142 + */ 143 + static const struct munit skx_all_munits[] = { 144 + { 0x2054, { }, 1, 1, SAD_ALL }, 145 + { 0x2055, { }, 1, 1, UTIL_ALL }, 146 + { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, 147 + { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, 148 + { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, 149 + { 0x208e, { }, 1, 0, SAD }, 150 + { } 151 + }; 152 + 153 + /* 154 + * We use the per-socket device 0x2016 to count how many sockets are present, 155 + * and to detemine which PCI buses are associated with each socket. Allocate 156 + * and build the full list of all the skx_dev structures that we need here. 157 + */ 158 + static int get_all_bus_mappings(void) 159 + { 160 + struct pci_dev *pdev, *prev; 161 + struct skx_dev *d; 162 + u32 reg; 163 + int ndev = 0; 164 + 165 + prev = NULL; 166 + for (;;) { 167 + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev); 168 + if (!pdev) 169 + break; 170 + ndev++; 171 + d = kzalloc(sizeof(*d), GFP_KERNEL); 172 + if (!d) { 173 + pci_dev_put(pdev); 174 + return -ENOMEM; 175 + } 176 + pci_read_config_dword(pdev, 0xCC, &reg); 177 + d->bus[0] = GET_BITFIELD(reg, 0, 7); 178 + d->bus[1] = GET_BITFIELD(reg, 8, 15); 179 + d->bus[2] = GET_BITFIELD(reg, 16, 23); 180 + d->bus[3] = GET_BITFIELD(reg, 24, 31); 181 + edac_dbg(2, "busses: %x, %x, %x, %x\n", 182 + d->bus[0], d->bus[1], d->bus[2], d->bus[3]); 183 + list_add_tail(&d->list, &skx_edac_list); 184 + skx_num_sockets++; 185 + prev = pdev; 186 + } 187 + 188 + return ndev; 189 + } 190 + 191 + static int get_all_munits(const struct munit *m) 192 + { 193 + struct pci_dev *pdev, *prev; 194 + struct skx_dev *d; 195 + u32 reg; 196 + int i = 0, ndev = 0; 197 + 198 + prev = NULL; 199 + for (;;) { 200 + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev); 201 + if (!pdev) 202 + break; 203 + ndev++; 204 + if (m->per_socket == NUM_IMC) { 205 + for (i = 0; i < NUM_IMC; i++) 206 + if (m->devfn[i] == pdev->devfn) 207 + break; 208 + if (i == NUM_IMC) 209 + goto fail; 210 + } 211 + d = get_skx_dev(pdev->bus->number, m->busidx); 212 + if (!d) 213 + goto fail; 214 + 215 + /* Be sure that the device is enabled */ 216 + if (unlikely(pci_enable_device(pdev) < 0)) { 217 + skx_printk(KERN_ERR, 218 + "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did); 219 + goto fail; 220 + } 221 + 222 + switch (m->mtype) { 223 + case CHAN0: case CHAN1: case CHAN2: 224 + pci_dev_get(pdev); 225 + d->imc[i].chan[m->mtype].cdev = pdev; 226 + break; 227 + case SAD_ALL: 228 + pci_dev_get(pdev); 229 + d->sad_all = pdev; 230 + break; 231 + case UTIL_ALL: 232 + pci_dev_get(pdev); 233 + d->util_all = pdev; 234 + break; 235 + case SAD: 236 + /* 237 + * one of these devices per core, including cores 238 + * that don't exist on this SKU. Ignore any that 239 + * read a route table of zero, make sure all the 240 + * non-zero values match. 241 + */ 242 + pci_read_config_dword(pdev, 0xB4, &reg); 243 + if (reg != 0) { 244 + if (d->mcroute == 0) 245 + d->mcroute = reg; 246 + else if (d->mcroute != reg) { 247 + skx_printk(KERN_ERR, 248 + "mcroute mismatch\n"); 249 + goto fail; 250 + } 251 + } 252 + ndev--; 253 + break; 254 + } 255 + 256 + prev = pdev; 257 + } 258 + 259 + return ndev; 260 + fail: 261 + pci_dev_put(pdev); 262 + return -ENODEV; 263 + } 264 + 265 + const struct x86_cpu_id skx_cpuids[] = { 266 + { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ 267 + { } 268 + }; 269 + MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); 270 + 271 + static u8 get_src_id(struct skx_dev *d) 272 + { 273 + u32 reg; 274 + 275 + pci_read_config_dword(d->util_all, 0xF0, &reg); 276 + 277 + return GET_BITFIELD(reg, 12, 14); 278 + } 279 + 280 + static u8 skx_get_node_id(struct skx_dev *d) 281 + { 282 + u32 reg; 283 + 284 + pci_read_config_dword(d->util_all, 0xF4, &reg); 285 + 286 + return GET_BITFIELD(reg, 0, 2); 287 + } 288 + 289 + static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, 290 + int maxval, char *name) 291 + { 292 + u32 val = GET_BITFIELD(reg, lobit, hibit); 293 + 294 + if (val < minval || val > maxval) { 295 + edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg); 296 + return -EINVAL; 297 + } 298 + return val + add; 299 + } 300 + 301 + #define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15) 302 + 303 + #define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks") 304 + #define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows") 305 + #define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols") 306 + 307 + static int get_width(u32 mtr) 308 + { 309 + switch (GET_BITFIELD(mtr, 8, 9)) { 310 + case 0: 311 + return DEV_X4; 312 + case 1: 313 + return DEV_X8; 314 + case 2: 315 + return DEV_X16; 316 + } 317 + return DEV_UNKNOWN; 318 + } 319 + 320 + static int skx_get_hi_lo(void) 321 + { 322 + struct pci_dev *pdev; 323 + u32 reg; 324 + 325 + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL); 326 + if (!pdev) { 327 + edac_dbg(0, "Can't get tolm/tohm\n"); 328 + return -ENODEV; 329 + } 330 + 331 + pci_read_config_dword(pdev, 0xD0, &reg); 332 + skx_tolm = reg; 333 + pci_read_config_dword(pdev, 0xD4, &reg); 334 + skx_tohm = reg; 335 + pci_read_config_dword(pdev, 0xD8, &reg); 336 + skx_tohm |= (u64)reg << 32; 337 + 338 + pci_dev_put(pdev); 339 + edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm); 340 + 341 + return 0; 342 + } 343 + 344 + static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, 345 + struct skx_imc *imc, int chan, int dimmno) 346 + { 347 + int banks = 16, ranks, rows, cols, npages; 348 + u64 size; 349 + 350 + if (!IS_DIMM_PRESENT(mtr)) 351 + return 0; 352 + ranks = numrank(mtr); 353 + rows = numrow(mtr); 354 + cols = numcol(mtr); 355 + 356 + /* 357 + * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) 358 + */ 359 + size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); 360 + npages = MiB_TO_PAGES(size); 361 + 362 + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", 363 + imc->mc, chan, dimmno, size, npages, 364 + banks, ranks, rows, cols); 365 + 366 + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0); 367 + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9); 368 + imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); 369 + imc->chan[chan].dimms[dimmno].rowbits = rows; 370 + imc->chan[chan].dimms[dimmno].colbits = cols; 371 + 372 + dimm->nr_pages = npages; 373 + dimm->grain = 32; 374 + dimm->dtype = get_width(mtr); 375 + dimm->mtype = MEM_DDR4; 376 + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ 377 + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", 378 + imc->src_id, imc->lmc, chan, dimmno); 379 + 380 + return 1; 381 + } 382 + 383 + #define SKX_GET_MTMTR(dev, reg) \ 384 + pci_read_config_dword((dev), 0x87c, &reg) 385 + 386 + static bool skx_check_ecc(struct pci_dev *pdev) 387 + { 388 + u32 mtmtr; 389 + 390 + SKX_GET_MTMTR(pdev, mtmtr); 391 + 392 + return !!GET_BITFIELD(mtmtr, 2, 2); 393 + } 394 + 395 + static int skx_get_dimm_config(struct mem_ctl_info *mci) 396 + { 397 + struct skx_pvt *pvt = mci->pvt_info; 398 + struct skx_imc *imc = pvt->imc; 399 + struct dimm_info *dimm; 400 + int i, j; 401 + u32 mtr, amap; 402 + int ndimms; 403 + 404 + for (i = 0; i < NUM_CHANNELS; i++) { 405 + ndimms = 0; 406 + pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); 407 + for (j = 0; j < NUM_DIMMS; j++) { 408 + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, 409 + mci->n_layers, i, j, 0); 410 + pci_read_config_dword(imc->chan[i].cdev, 411 + 0x80 + 4*j, &mtr); 412 + ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); 413 + } 414 + if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { 415 + skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); 416 + return -ENODEV; 417 + } 418 + } 419 + 420 + return 0; 421 + } 422 + 423 + static void skx_unregister_mci(struct skx_imc *imc) 424 + { 425 + struct mem_ctl_info *mci = imc->mci; 426 + 427 + if (!mci) 428 + return; 429 + 430 + edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); 431 + 432 + /* Remove MC sysfs nodes */ 433 + edac_mc_del_mc(mci->pdev); 434 + 435 + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); 436 + kfree(mci->ctl_name); 437 + edac_mc_free(mci); 438 + } 439 + 440 + static int skx_register_mci(struct skx_imc *imc) 441 + { 442 + struct mem_ctl_info *mci; 443 + struct edac_mc_layer layers[2]; 444 + struct pci_dev *pdev = imc->chan[0].cdev; 445 + struct skx_pvt *pvt; 446 + int rc; 447 + 448 + /* allocate a new MC control structure */ 449 + layers[0].type = EDAC_MC_LAYER_CHANNEL; 450 + layers[0].size = NUM_CHANNELS; 451 + layers[0].is_virt_csrow = false; 452 + layers[1].type = EDAC_MC_LAYER_SLOT; 453 + layers[1].size = NUM_DIMMS; 454 + layers[1].is_virt_csrow = true; 455 + mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, 456 + sizeof(struct skx_pvt)); 457 + 458 + if (unlikely(!mci)) 459 + return -ENOMEM; 460 + 461 + edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); 462 + 463 + /* Associate skx_dev and mci for future usage */ 464 + imc->mci = mci; 465 + pvt = mci->pvt_info; 466 + pvt->imc = imc; 467 + 468 + mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d", 469 + imc->node_id, imc->lmc); 470 + mci->mtype_cap = MEM_FLAG_DDR4; 471 + mci->edac_ctl_cap = EDAC_FLAG_NONE; 472 + mci->edac_cap = EDAC_FLAG_NONE; 473 + mci->mod_name = "skx_edac.c"; 474 + mci->dev_name = pci_name(imc->chan[0].cdev); 475 + mci->mod_ver = SKX_REVISION; 476 + mci->ctl_page_to_phys = NULL; 477 + 478 + rc = skx_get_dimm_config(mci); 479 + if (rc < 0) 480 + goto fail; 481 + 482 + /* record ptr to the generic device */ 483 + mci->pdev = &pdev->dev; 484 + 485 + /* add this new MC control structure to EDAC's list of MCs */ 486 + if (unlikely(edac_mc_add_mc(mci))) { 487 + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); 488 + rc = -EINVAL; 489 + goto fail; 490 + } 491 + 492 + return 0; 493 + 494 + fail: 495 + kfree(mci->ctl_name); 496 + edac_mc_free(mci); 497 + imc->mci = NULL; 498 + return rc; 499 + } 500 + 501 + #define SKX_MAX_SAD 24 502 + 503 + #define SKX_GET_SAD(d, i, reg) \ 504 + pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg) 505 + #define SKX_GET_ILV(d, i, reg) \ 506 + pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg) 507 + 508 + #define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) 509 + #define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) 510 + #define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26) 511 + #define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6) 512 + #define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4) 513 + #define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2) 514 + #define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0) 515 + 516 + #define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) 517 + #define SKX_ILV_TARGET(tgt) ((tgt) & 7) 518 + 519 + static bool skx_sad_decode(struct decoded_addr *res) 520 + { 521 + struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list); 522 + u64 addr = res->addr; 523 + int i, idx, tgt, lchan, shift; 524 + u32 sad, ilv; 525 + u64 limit, prev_limit; 526 + int remote = 0; 527 + 528 + /* Simple sanity check for I/O space or out of range */ 529 + if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) { 530 + edac_dbg(0, "Address %llx out of range\n", addr); 531 + return false; 532 + } 533 + 534 + restart: 535 + prev_limit = 0; 536 + for (i = 0; i < SKX_MAX_SAD; i++) { 537 + SKX_GET_SAD(d, i, sad); 538 + limit = SKX_SAD_LIMIT(sad); 539 + if (SKX_SAD_ENABLE(sad)) { 540 + if (addr >= prev_limit && addr <= limit) 541 + goto sad_found; 542 + } 543 + prev_limit = limit + 1; 544 + } 545 + edac_dbg(0, "No SAD entry for %llx\n", addr); 546 + return false; 547 + 548 + sad_found: 549 + SKX_GET_ILV(d, i, ilv); 550 + 551 + switch (SKX_SAD_INTERLEAVE(sad)) { 552 + case 0: 553 + idx = GET_BITFIELD(addr, 6, 8); 554 + break; 555 + case 1: 556 + idx = GET_BITFIELD(addr, 8, 10); 557 + break; 558 + case 2: 559 + idx = GET_BITFIELD(addr, 12, 14); 560 + break; 561 + case 3: 562 + idx = GET_BITFIELD(addr, 30, 32); 563 + break; 564 + } 565 + 566 + tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3); 567 + 568 + /* If point to another node, find it and start over */ 569 + if (SKX_ILV_REMOTE(tgt)) { 570 + if (remote) { 571 + edac_dbg(0, "Double remote!\n"); 572 + return false; 573 + } 574 + remote = 1; 575 + list_for_each_entry(d, &skx_edac_list, list) { 576 + if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) 577 + goto restart; 578 + } 579 + edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt)); 580 + return false; 581 + } 582 + 583 + if (SKX_SAD_MOD3(sad) == 0) 584 + lchan = SKX_ILV_TARGET(tgt); 585 + else { 586 + switch (SKX_SAD_MOD3MODE(sad)) { 587 + case 0: 588 + shift = 6; 589 + break; 590 + case 1: 591 + shift = 8; 592 + break; 593 + case 2: 594 + shift = 12; 595 + break; 596 + default: 597 + edac_dbg(0, "illegal mod3mode\n"); 598 + return false; 599 + } 600 + switch (SKX_SAD_MOD3ASMOD2(sad)) { 601 + case 0: 602 + lchan = (addr >> shift) % 3; 603 + break; 604 + case 1: 605 + lchan = (addr >> shift) % 2; 606 + break; 607 + case 2: 608 + lchan = (addr >> shift) % 2; 609 + lchan = (lchan << 1) | ~lchan; 610 + break; 611 + case 3: 612 + lchan = ((addr >> shift) % 2) << 1; 613 + break; 614 + } 615 + lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1); 616 + } 617 + 618 + res->dev = d; 619 + res->socket = d->imc[0].src_id; 620 + res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2); 621 + res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19); 622 + 623 + edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n", 624 + res->addr, res->socket, res->imc, res->channel); 625 + return true; 626 + } 627 + 628 + #define SKX_MAX_TAD 8 629 + 630 + #define SKX_GET_TADBASE(d, mc, i, reg) \ 631 + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg) 632 + #define SKX_GET_TADWAYNESS(d, mc, i, reg) \ 633 + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg) 634 + #define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ 635 + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg) 636 + 637 + #define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) 638 + #define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) 639 + #define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7) 640 + #define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26) 641 + #define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26) 642 + #define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11)) 643 + #define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1) 644 + 645 + /* which bit used for both socket and channel interleave */ 646 + static int skx_granularity[] = { 6, 8, 12, 30 }; 647 + 648 + static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits) 649 + { 650 + addr >>= shift; 651 + addr /= ways; 652 + addr <<= shift; 653 + 654 + return addr | (lowbits & ((1ull << shift) - 1)); 655 + } 656 + 657 + static bool skx_tad_decode(struct decoded_addr *res) 658 + { 659 + int i; 660 + u32 base, wayness, chnilvoffset; 661 + int skt_interleave_bit, chn_interleave_bit; 662 + u64 channel_addr; 663 + 664 + for (i = 0; i < SKX_MAX_TAD; i++) { 665 + SKX_GET_TADBASE(res->dev, res->imc, i, base); 666 + SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness); 667 + if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness)) 668 + goto tad_found; 669 + } 670 + edac_dbg(0, "No TAD entry for %llx\n", res->addr); 671 + return false; 672 + 673 + tad_found: 674 + res->sktways = SKX_TAD_SKTWAYS(wayness); 675 + res->chanways = SKX_TAD_CHNWAYS(wayness); 676 + skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)]; 677 + chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)]; 678 + 679 + SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset); 680 + channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset); 681 + 682 + if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) { 683 + /* Must handle channel first, then socket */ 684 + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, 685 + res->chanways, channel_addr); 686 + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, 687 + res->sktways, channel_addr); 688 + } else { 689 + /* Handle socket then channel. Preserve low bits from original address */ 690 + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, 691 + res->sktways, res->addr); 692 + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, 693 + res->chanways, res->addr); 694 + } 695 + 696 + res->chan_addr = channel_addr; 697 + 698 + edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n", 699 + res->addr, res->chan_addr, res->sktways, res->chanways); 700 + return true; 701 + } 702 + 703 + #define SKX_MAX_RIR 4 704 + 705 + #define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ 706 + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ 707 + 0x108 + 4 * (i), &reg) 708 + #define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ 709 + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ 710 + 0x120 + 16 * idx + 4 * (i), &reg) 711 + 712 + #define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) 713 + #define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) 714 + #define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29)) 715 + #define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19) 716 + #define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26)) 717 + 718 + static bool skx_rir_decode(struct decoded_addr *res) 719 + { 720 + int i, idx, chan_rank; 721 + int shift; 722 + u32 rirway, rirlv; 723 + u64 rank_addr, prev_limit = 0, limit; 724 + 725 + if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg) 726 + shift = 6; 727 + else 728 + shift = 13; 729 + 730 + for (i = 0; i < SKX_MAX_RIR; i++) { 731 + SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway); 732 + limit = SKX_RIR_LIMIT(rirway); 733 + if (SKX_RIR_VALID(rirway)) { 734 + if (prev_limit <= res->chan_addr && 735 + res->chan_addr <= limit) 736 + goto rir_found; 737 + } 738 + prev_limit = limit; 739 + } 740 + edac_dbg(0, "No RIR entry for %llx\n", res->addr); 741 + return false; 742 + 743 + rir_found: 744 + rank_addr = res->chan_addr >> shift; 745 + rank_addr /= SKX_RIR_WAYS(rirway); 746 + rank_addr <<= shift; 747 + rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0); 748 + 749 + res->rank_address = rank_addr; 750 + idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway); 751 + 752 + SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv); 753 + res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv); 754 + chan_rank = SKX_RIR_CHAN_RANK(rirlv); 755 + res->channel_rank = chan_rank; 756 + res->dimm = chan_rank / 4; 757 + res->rank = chan_rank % 4; 758 + 759 + edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n", 760 + res->addr, res->dimm, res->rank, 761 + res->channel_rank, res->rank_address); 762 + return true; 763 + } 764 + 765 + static u8 skx_close_row[] = { 766 + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 767 + }; 768 + static u8 skx_close_column[] = { 769 + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 770 + }; 771 + static u8 skx_open_row[] = { 772 + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 773 + }; 774 + static u8 skx_open_column[] = { 775 + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 776 + }; 777 + static u8 skx_open_fine_column[] = { 778 + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 779 + }; 780 + 781 + static int skx_bits(u64 addr, int nbits, u8 *bits) 782 + { 783 + int i, res = 0; 784 + 785 + for (i = 0; i < nbits; i++) 786 + res |= ((addr >> bits[i]) & 1) << i; 787 + return res; 788 + } 789 + 790 + static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) 791 + { 792 + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); 793 + 794 + if (do_xor) 795 + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); 796 + 797 + return ret; 798 + } 799 + 800 + static bool skx_mad_decode(struct decoded_addr *r) 801 + { 802 + struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm]; 803 + int bg0 = dimm->fine_grain_bank ? 6 : 13; 804 + 805 + if (dimm->close_pg) { 806 + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row); 807 + r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column); 808 + r->column |= 0x400; /* C10 is autoprecharge, always set */ 809 + r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28); 810 + r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21); 811 + } else { 812 + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row); 813 + if (dimm->fine_grain_bank) 814 + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column); 815 + else 816 + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column); 817 + r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23); 818 + r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21); 819 + } 820 + r->row &= (1u << dimm->rowbits) - 1; 821 + 822 + edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n", 823 + r->addr, r->row, r->column, r->bank_address, 824 + r->bank_group); 825 + return true; 826 + } 827 + 828 + static bool skx_decode(struct decoded_addr *res) 829 + { 830 + 831 + return skx_sad_decode(res) && skx_tad_decode(res) && 832 + skx_rir_decode(res) && skx_mad_decode(res); 833 + } 834 + 835 + #ifdef CONFIG_EDAC_DEBUG 836 + /* 837 + * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr. 838 + * Write an address to this file to exercise the address decode 839 + * logic in this driver. 840 + */ 841 + static struct dentry *skx_test; 842 + static u64 skx_fake_addr; 843 + 844 + static int debugfs_u64_set(void *data, u64 val) 845 + { 846 + struct decoded_addr res; 847 + 848 + res.addr = val; 849 + skx_decode(&res); 850 + 851 + return 0; 852 + } 853 + 854 + DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); 855 + 856 + static struct dentry *mydebugfs_create(const char *name, umode_t mode, 857 + struct dentry *parent, u64 *value) 858 + { 859 + return debugfs_create_file(name, mode, parent, value, &fops_u64_wo); 860 + } 861 + 862 + static void setup_skx_debug(void) 863 + { 864 + skx_test = debugfs_create_dir("skx_edac_test", NULL); 865 + mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr); 866 + } 867 + 868 + static void teardown_skx_debug(void) 869 + { 870 + debugfs_remove_recursive(skx_test); 871 + } 872 + #else 873 + static void setup_skx_debug(void) 874 + { 875 + } 876 + 877 + static void teardown_skx_debug(void) 878 + { 879 + } 880 + #endif /*CONFIG_EDAC_DEBUG*/ 881 + 882 + static void skx_mce_output_error(struct mem_ctl_info *mci, 883 + const struct mce *m, 884 + struct decoded_addr *res) 885 + { 886 + enum hw_event_mc_err_type tp_event; 887 + char *type, *optype, msg[256]; 888 + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); 889 + bool overflow = GET_BITFIELD(m->status, 62, 62); 890 + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); 891 + bool recoverable; 892 + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); 893 + u32 mscod = GET_BITFIELD(m->status, 16, 31); 894 + u32 errcode = GET_BITFIELD(m->status, 0, 15); 895 + u32 optypenum = GET_BITFIELD(m->status, 4, 6); 896 + 897 + recoverable = GET_BITFIELD(m->status, 56, 56); 898 + 899 + if (uncorrected_error) { 900 + if (ripv) { 901 + type = "FATAL"; 902 + tp_event = HW_EVENT_ERR_FATAL; 903 + } else { 904 + type = "NON_FATAL"; 905 + tp_event = HW_EVENT_ERR_UNCORRECTED; 906 + } 907 + } else { 908 + type = "CORRECTED"; 909 + tp_event = HW_EVENT_ERR_CORRECTED; 910 + } 911 + 912 + /* 913 + * According with Table 15-9 of the Intel Architecture spec vol 3A, 914 + * memory errors should fit in this mask: 915 + * 000f 0000 1mmm cccc (binary) 916 + * where: 917 + * f = Correction Report Filtering Bit. If 1, subsequent errors 918 + * won't be shown 919 + * mmm = error type 920 + * cccc = channel 921 + * If the mask doesn't match, report an error to the parsing logic 922 + */ 923 + if (!((errcode & 0xef80) == 0x80)) { 924 + optype = "Can't parse: it is not a mem"; 925 + } else { 926 + switch (optypenum) { 927 + case 0: 928 + optype = "generic undef request error"; 929 + break; 930 + case 1: 931 + optype = "memory read error"; 932 + break; 933 + case 2: 934 + optype = "memory write error"; 935 + break; 936 + case 3: 937 + optype = "addr/cmd error"; 938 + break; 939 + case 4: 940 + optype = "memory scrubbing error"; 941 + break; 942 + default: 943 + optype = "reserved"; 944 + break; 945 + } 946 + } 947 + 948 + snprintf(msg, sizeof(msg), 949 + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", 950 + overflow ? " OVERFLOW" : "", 951 + (uncorrected_error && recoverable) ? " recoverable" : "", 952 + mscod, errcode, 953 + res->socket, res->imc, res->rank, 954 + res->bank_group, res->bank_address, res->row, res->column); 955 + 956 + edac_dbg(0, "%s\n", msg); 957 + 958 + /* Call the helper to output message */ 959 + edac_mc_handle_error(tp_event, mci, core_err_cnt, 960 + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, 961 + res->channel, res->dimm, -1, 962 + optype, msg); 963 + } 964 + 965 + static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 966 + void *data) 967 + { 968 + struct mce *mce = (struct mce *)data; 969 + struct decoded_addr res; 970 + struct mem_ctl_info *mci; 971 + char *type; 972 + 973 + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) 974 + return NOTIFY_DONE; 975 + 976 + /* ignore unless this is memory related with an address */ 977 + if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) 978 + return NOTIFY_DONE; 979 + 980 + res.addr = mce->addr; 981 + if (!skx_decode(&res)) 982 + return NOTIFY_DONE; 983 + mci = res.dev->imc[res.imc].mci; 984 + 985 + if (mce->mcgstatus & MCG_STATUS_MCIP) 986 + type = "Exception"; 987 + else 988 + type = "Event"; 989 + 990 + skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); 991 + 992 + skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " 993 + "Bank %d: %016Lx\n", mce->extcpu, type, 994 + mce->mcgstatus, mce->bank, mce->status); 995 + skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); 996 + skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); 997 + skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); 998 + 999 + skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " 1000 + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, 1001 + mce->time, mce->socketid, mce->apicid); 1002 + 1003 + skx_mce_output_error(mci, mce, &res); 1004 + 1005 + return NOTIFY_DONE; 1006 + } 1007 + 1008 + static struct notifier_block skx_mce_dec = { 1009 + .notifier_call = skx_mce_check_error, 1010 + }; 1011 + 1012 + static void skx_remove(void) 1013 + { 1014 + int i, j; 1015 + struct skx_dev *d, *tmp; 1016 + 1017 + edac_dbg(0, "\n"); 1018 + 1019 + list_for_each_entry_safe(d, tmp, &skx_edac_list, list) { 1020 + list_del(&d->list); 1021 + for (i = 0; i < NUM_IMC; i++) { 1022 + skx_unregister_mci(&d->imc[i]); 1023 + for (j = 0; j < NUM_CHANNELS; j++) 1024 + pci_dev_put(d->imc[i].chan[j].cdev); 1025 + } 1026 + pci_dev_put(d->util_all); 1027 + pci_dev_put(d->sad_all); 1028 + 1029 + kfree(d); 1030 + } 1031 + } 1032 + 1033 + /* 1034 + * skx_init: 1035 + * make sure we are running on the correct cpu model 1036 + * search for all the devices we need 1037 + * check which DIMMs are present. 1038 + */ 1039 + int __init skx_init(void) 1040 + { 1041 + const struct x86_cpu_id *id; 1042 + const struct munit *m; 1043 + int rc = 0, i; 1044 + u8 mc = 0, src_id, node_id; 1045 + struct skx_dev *d; 1046 + 1047 + edac_dbg(2, "\n"); 1048 + 1049 + id = x86_match_cpu(skx_cpuids); 1050 + if (!id) 1051 + return -ENODEV; 1052 + 1053 + rc = skx_get_hi_lo(); 1054 + if (rc) 1055 + return rc; 1056 + 1057 + rc = get_all_bus_mappings(); 1058 + if (rc < 0) 1059 + goto fail; 1060 + if (rc == 0) { 1061 + edac_dbg(2, "No memory controllers found\n"); 1062 + return -ENODEV; 1063 + } 1064 + 1065 + for (m = skx_all_munits; m->did; m++) { 1066 + rc = get_all_munits(m); 1067 + if (rc < 0) 1068 + goto fail; 1069 + if (rc != m->per_socket * skx_num_sockets) { 1070 + edac_dbg(2, "Expected %d, got %d of %x\n", 1071 + m->per_socket * skx_num_sockets, rc, m->did); 1072 + rc = -ENODEV; 1073 + goto fail; 1074 + } 1075 + } 1076 + 1077 + list_for_each_entry(d, &skx_edac_list, list) { 1078 + src_id = get_src_id(d); 1079 + node_id = skx_get_node_id(d); 1080 + edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); 1081 + for (i = 0; i < NUM_IMC; i++) { 1082 + d->imc[i].mc = mc++; 1083 + d->imc[i].lmc = i; 1084 + d->imc[i].src_id = src_id; 1085 + d->imc[i].node_id = node_id; 1086 + rc = skx_register_mci(&d->imc[i]); 1087 + if (rc < 0) 1088 + goto fail; 1089 + } 1090 + } 1091 + 1092 + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ 1093 + opstate_init(); 1094 + 1095 + setup_skx_debug(); 1096 + 1097 + mce_register_decode_chain(&skx_mce_dec); 1098 + 1099 + return 0; 1100 + fail: 1101 + skx_remove(); 1102 + return rc; 1103 + } 1104 + 1105 + static void __exit skx_exit(void) 1106 + { 1107 + edac_dbg(2, "\n"); 1108 + mce_unregister_decode_chain(&skx_mce_dec); 1109 + skx_remove(); 1110 + teardown_skx_debug(); 1111 + } 1112 + 1113 + module_init(skx_init); 1114 + module_exit(skx_exit); 1115 + 1116 + module_param(edac_op_state, int, 0444); 1117 + MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1118 + 1119 + MODULE_LICENSE("GPL v2"); 1120 + MODULE_AUTHOR("Tony Luck"); 1121 + MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");