Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS changes from Ingo Molnar:

- Add an Intel CMCI hotplug fix

- Add AMD family 16h EDAC support

- Make the AMD MCE banks code more flexible for virtual environments

* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
amd64_edac: Add Family 16h support
x86/mce: Rework cmci_rediscover() to play well with CPU hotplug
x86, MCE, AMD: Use MCG_CAP MSR to find out number of banks on AMD
x86, MCE, AMD: Replace shared_bank array with is_shared_bank() helper

+103 -41
+2 -2
arch/x86/include/asm/mce.h
··· 146 146 void mce_intel_feature_init(struct cpuinfo_x86 *c); 147 147 void cmci_clear(void); 148 148 void cmci_reenable(void); 149 - void cmci_rediscover(int dying); 149 + void cmci_rediscover(void); 150 150 void cmci_recheck(void); 151 151 #else 152 152 static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } 153 153 static inline void cmci_clear(void) {} 154 154 static inline void cmci_reenable(void) {} 155 - static inline void cmci_rediscover(int dying) {} 155 + static inline void cmci_rediscover(void) {} 156 156 static inline void cmci_recheck(void) {} 157 157 #endif 158 158
+2 -1
arch/x86/kernel/amd_nb.c
··· 20 20 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, 21 21 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, 22 22 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, 23 + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, 23 24 {} 24 25 }; 25 26 EXPORT_SYMBOL(amd_nb_misc_ids); 26 27 27 28 static const struct pci_device_id amd_nb_link_ids[] = { 28 29 { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 30 + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, 29 31 {} 30 32 }; 31 33 ··· 83 81 next_northbridge(link, amd_nb_link_ids); 84 82 } 85 83 86 - /* some CPU families (e.g. family 0x11) do not support GART */ 87 84 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || 88 85 boot_cpu_data.x86 == 0x15) 89 86 amd_northbridges.flags |= AMD_NB_GART;
+1 -1
arch/x86/kernel/cpu/mcheck/mce.c
··· 2358 2358 2359 2359 if (action == CPU_POST_DEAD) { 2360 2360 /* intentionally ignoring frozen here */ 2361 - cmci_rediscover(cpu); 2361 + cmci_rediscover(); 2362 2362 } 2363 2363 2364 2364 return NOTIFY_OK;
+24 -15
arch/x86/kernel/cpu/mcheck/mce_amd.c
··· 33 33 #include <asm/mce.h> 34 34 #include <asm/msr.h> 35 35 36 - #define NR_BANKS 6 37 36 #define NR_BLOCKS 9 38 37 #define THRESHOLD_MAX 0xFFF 39 38 #define INT_TYPE_APIC 0x00020000 ··· 56 57 "execution_unit", 57 58 }; 58 59 59 - static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); 60 - 61 - static unsigned char shared_bank[NR_BANKS] = { 62 - 0, 0, 0, 0, 1 63 - }; 64 - 60 + static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); 65 61 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 66 62 67 63 static void amd_threshold_interrupt(void); ··· 72 78 int lvt_off; 73 79 u16 old_limit; 74 80 }; 81 + 82 + static inline bool is_shared_bank(int bank) 83 + { 84 + /* Bank 4 is for northbridge reporting and is thus shared */ 85 + return (bank == 4); 86 + } 75 87 76 88 static const char * const bank4_names(struct threshold_block *b) 77 89 { ··· 214 214 unsigned int bank, block; 215 215 int offset = -1; 216 216 217 - for (bank = 0; bank < NR_BANKS; ++bank) { 217 + for (bank = 0; bank < mca_cfg.banks; ++bank) { 218 218 for (block = 0; block < NR_BLOCKS; ++block) { 219 219 if (block == 0) 220 220 address = MSR_IA32_MC0_MISC + bank * 4; ··· 276 276 mce_setup(&m); 277 277 278 278 /* assume first bank caused it */ 279 - for (bank = 0; bank < NR_BANKS; ++bank) { 279 + for (bank = 0; bank < mca_cfg.banks; ++bank) { 280 280 if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) 281 281 continue; 282 282 for (block = 0; block < NR_BLOCKS; ++block) { ··· 467 467 u32 low, high; 468 468 int err; 469 469 470 - if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) 470 + if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) 471 471 return 0; 472 472 473 473 if (rdmsr_safe_on_cpu(cpu, address, &low, &high)) ··· 575 575 const char *name = th_names[bank]; 576 576 int err = 0; 577 577 578 - if (shared_bank[bank]) { 578 + if (is_shared_bank(bank)) { 579 579 nb = node_to_amd_nb(amd_get_nb_id(cpu)); 580 580 581 581 /* threshold descriptor already initialized on this node? */ ··· 609 609 610 610 per_cpu(threshold_banks, cpu)[bank] = b; 611 611 612 - if (shared_bank[bank]) { 612 + if (is_shared_bank(bank)) { 613 613 atomic_set(&b->cpus, 1); 614 614 615 615 /* nb is already initialized, see above */ ··· 635 635 static __cpuinit int threshold_create_device(unsigned int cpu) 636 636 { 637 637 unsigned int bank; 638 + struct threshold_bank **bp; 638 639 int err = 0; 639 640 640 - for (bank = 0; bank < NR_BANKS; ++bank) { 641 + bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks, 642 + GFP_KERNEL); 643 + if (!bp) 644 + return -ENOMEM; 645 + 646 + per_cpu(threshold_banks, cpu) = bp; 647 + 648 + for (bank = 0; bank < mca_cfg.banks; ++bank) { 641 649 if (!(per_cpu(bank_map, cpu) & (1 << bank))) 642 650 continue; 643 651 err = threshold_create_bank(cpu, bank); ··· 699 691 if (!b->blocks) 700 692 goto free_out; 701 693 702 - if (shared_bank[bank]) { 694 + if (is_shared_bank(bank)) { 703 695 if (!atomic_dec_and_test(&b->cpus)) { 704 696 __threshold_remove_blocks(b); 705 697 per_cpu(threshold_banks, cpu)[bank] = NULL; ··· 727 719 { 728 720 unsigned int bank; 729 721 730 - for (bank = 0; bank < NR_BANKS; ++bank) { 722 + for (bank = 0; bank < mca_cfg.banks; ++bank) { 731 723 if (!(per_cpu(bank_map, cpu) & (1 << bank))) 732 724 continue; 733 725 threshold_remove_bank(cpu, bank); 734 726 } 727 + kfree(per_cpu(threshold_banks, cpu)); 735 728 } 736 729 737 730 /* get notified when a cpu comes on/off */
+5 -20
arch/x86/kernel/cpu/mcheck/mce_intel.c
··· 285 285 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); 286 286 } 287 287 288 - static long cmci_rediscover_work_func(void *arg) 288 + static void cmci_rediscover_work_func(void *arg) 289 289 { 290 290 int banks; 291 291 292 292 /* Recheck banks in case CPUs don't all have the same */ 293 293 if (cmci_supported(&banks)) 294 294 cmci_discover(banks); 295 - 296 - return 0; 297 295 } 298 296 299 - /* 300 - * After a CPU went down cycle through all the others and rediscover 301 - * Must run in process context. 302 - */ 303 - void cmci_rediscover(int dying) 297 + /* After a CPU went down cycle through all the others and rediscover */ 298 + void cmci_rediscover(void) 304 299 { 305 - int cpu, banks; 300 + int banks; 306 301 307 302 if (!cmci_supported(&banks)) 308 303 return; 309 304 310 - for_each_online_cpu(cpu) { 311 - if (cpu == dying) 312 - continue; 313 - 314 - if (cpu == smp_processor_id()) { 315 - cmci_rediscover_work_func(NULL); 316 - continue; 317 - } 318 - 319 - work_on_cpu(cpu, cmci_rediscover_work_func, NULL); 320 - } 305 + on_each_cpu(cmci_rediscover_work_func, NULL, 1); 321 306 } 322 307 323 308 /*
+64 -1
drivers/edac/amd64_edac.c
··· 98 98 * 99 99 * F15h: we select which DCT we access using F1x10C[DctCfgSel] 100 100 * 101 + * F16h: has only 1 DCT 101 102 */ 102 103 static int k8_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val, 103 104 const char *func) ··· 341 340 base_bits = GENMASK(21, 31) | GENMASK(9, 15); 342 341 mask_bits = GENMASK(21, 29) | GENMASK(9, 15); 343 342 addr_shift = 4; 343 + 344 + /* 345 + * F16h needs two addr_shift values: 8 for high and 6 for low 346 + * (cf. F16h BKDG). 347 + */ 348 + } else if (boot_cpu_data.x86 == 0x16) { 349 + csbase = pvt->csels[dct].csbases[csrow]; 350 + csmask = pvt->csels[dct].csmasks[csrow >> 1]; 351 + 352 + *base = (csbase & GENMASK(5, 15)) << 6; 353 + *base |= (csbase & GENMASK(19, 30)) << 8; 354 + 355 + *mask = ~0ULL; 356 + /* poke holes for the csmask */ 357 + *mask &= ~((GENMASK(5, 15) << 6) | 358 + (GENMASK(19, 30) << 8)); 359 + 360 + *mask |= (csmask & GENMASK(5, 15)) << 6; 361 + *mask |= (csmask & GENMASK(19, 30)) << 8; 362 + 363 + return; 344 364 } else { 345 365 csbase = pvt->csels[dct].csbases[csrow]; 346 366 csmask = pvt->csels[dct].csmasks[csrow >> 1]; ··· 1172 1150 return ddr3_cs_size(cs_mode, false); 1173 1151 } 1174 1152 1153 + /* 1154 + * F16h has only limited cs_modes 1155 + */ 1156 + static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, 1157 + unsigned cs_mode) 1158 + { 1159 + WARN_ON(cs_mode > 12); 1160 + 1161 + if (cs_mode == 6 || cs_mode == 8 || 1162 + cs_mode == 9 || cs_mode == 12) 1163 + return -1; 1164 + else 1165 + return ddr3_cs_size(cs_mode, false); 1166 + } 1167 + 1175 1168 static void read_dram_ctl_register(struct amd64_pvt *pvt) 1176 1169 { 1177 1170 ··· 1624 1587 .read_dct_pci_cfg = f15_read_dct_pci_cfg, 1625 1588 } 1626 1589 }, 1590 + [F16_CPUS] = { 1591 + .ctl_name = "F16h", 1592 + .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, 1593 + .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3, 1594 + .ops = { 1595 + .early_channel_count = f1x_early_channel_count, 1596 + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, 1597 + .dbam_to_cs = f16_dbam_to_chip_select, 1598 + .read_dct_pci_cfg = f10_read_dct_pci_cfg, 1599 + } 1600 + }, 1627 1601 }; 1628 1602 1629 1603 /* ··· 1987 1939 1988 1940 if (c->x86 >= 0x10) { 1989 1941 amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); 1990 - amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); 1942 + if (c->x86 != 0x16) 1943 + /* F16h has only DCT0 */ 1944 + amd64_read_dct_pci_cfg(pvt, DBAM1, &pvt->dbam1); 1991 1945 1992 1946 /* F10h, revD and later can do x8 ECC too */ 1993 1947 if ((c->x86 > 0x10 || c->x86_model > 7) && tmp & BIT(25)) ··· 2406 2356 pvt->ops = &amd64_family_types[F15_CPUS].ops; 2407 2357 break; 2408 2358 2359 + case 0x16: 2360 + fam_type = &amd64_family_types[F16_CPUS]; 2361 + pvt->ops = &amd64_family_types[F16_CPUS].ops; 2362 + break; 2363 + 2409 2364 default: 2410 2365 amd64_err("Unsupported family!\n"); 2411 2366 return NULL; ··· 2631 2576 { 2632 2577 .vendor = PCI_VENDOR_ID_AMD, 2633 2578 .device = PCI_DEVICE_ID_AMD_15H_NB_F2, 2579 + .subvendor = PCI_ANY_ID, 2580 + .subdevice = PCI_ANY_ID, 2581 + .class = 0, 2582 + .class_mask = 0, 2583 + }, 2584 + { 2585 + .vendor = PCI_VENDOR_ID_AMD, 2586 + .device = PCI_DEVICE_ID_AMD_16H_NB_F2, 2634 2587 .subvendor = PCI_ANY_ID, 2635 2588 .subdevice = PCI_ANY_ID, 2636 2589 .class = 0,
+3 -1
drivers/edac/amd64_edac.h
··· 172 172 */ 173 173 #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 174 174 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 175 - 175 + #define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 176 + #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 176 177 177 178 /* 178 179 * Function 1 - Address Map ··· 297 296 K8_CPUS = 0, 298 297 F10_CPUS, 299 298 F15_CPUS, 299 + F16_CPUS, 300 300 NUM_FAMILIES, 301 301 }; 302 302
+2
include/linux/pci_ids.h
··· 524 524 #define PCI_DEVICE_ID_AMD_15H_NB_F3 0x1603 525 525 #define PCI_DEVICE_ID_AMD_15H_NB_F4 0x1604 526 526 #define PCI_DEVICE_ID_AMD_15H_NB_F5 0x1605 527 + #define PCI_DEVICE_ID_AMD_16H_NB_F3 0x1533 528 + #define PCI_DEVICE_ID_AMD_16H_NB_F4 0x1534 527 529 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 528 530 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 529 531 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001