Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (32 commits)
x86: add MAP_STACK mmap flag
x86: fix section mismatch warning - spp_getpage()
x86: change init_gdt to update the gdt via write_gdt, rather than a direct write.
x86-64: fix overlap of modules and fixmap areas
x86, geode-mfgpt: check IRQ before using MFGPT as clocksource
x86, acpi: cleanup, temp_stack is used only when CONFIG_SMP is set
x86: fix spin_is_contended()
x86, nmi: clean UP NMI watchdog failure message
x86, NMI: fix watchdog failure message
x86: fix /proc/meminfo DirectMap
x86: fix readb() et al compile error with gcc-3.2.3
arch/x86/Kconfig: clean up, experimental adjustement
x86: invalidate caches before going into suspend
x86, perfctr: don't use CCCR_OVF_PMI1 on Pentium 4Ds
x86, AMD IOMMU: initialize dma_ops after sysfs registration
x86m AMD IOMMU: cleanup: replace LOW_U32 macro with generic lower_32_bits
x86, AMD IOMMU: initialize device table properly
x86, AMD IOMMU: use status bit instead of memory write-back for completion wait
x86: silence mmconfig printk
x86, msr: fix NULL pointer deref due to msr_open on nonexistent CPUs
...

+300 -150
+3 -3
arch/x86/Kconfig
··· 951 951 local memory controller of the CPU and add some more 952 952 NUMA awareness to the kernel. 953 953 954 - For i386 this is currently highly experimental and should be only 954 + For 32-bit this is currently highly experimental and should be only 955 955 used for kernel development. It might also cause boot failures. 956 - For x86_64 this is recommended on all multiprocessor Opteron systems. 956 + For 64-bit this is recommended on all multiprocessor Opteron systems. 957 957 If the system is EM64T, you should say N unless your system is 958 958 EM64T NUMA. 959 959 ··· 1263 1263 strongly in flux, so no good recommendation can be made. 1264 1264 1265 1265 config CRASH_DUMP 1266 - bool "kernel crash dumps (EXPERIMENTAL)" 1266 + bool "kernel crash dumps" 1267 1267 depends on X86_64 || (X86_32 && HIGHMEM) 1268 1268 help 1269 1269 Generate crash dump after being started by kexec.
+8
arch/x86/boot/boot.h
··· 24 24 #include <linux/edd.h> 25 25 #include <asm/boot.h> 26 26 #include <asm/setup.h> 27 + #include "bitops.h" 28 + #include <asm/cpufeature.h> 27 29 28 30 /* Useful macros */ 29 31 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) ··· 244 242 int cmdline_find_option_bool(const char *option); 245 243 246 244 /* cpu.c, cpucheck.c */ 245 + struct cpu_features { 246 + int level; /* Family, or 64 for x86-64 */ 247 + int model; 248 + u32 flags[NCAPINTS]; 249 + }; 250 + extern struct cpu_features cpu; 247 251 int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); 248 252 int validate_cpu(void); 249 253
-3
arch/x86/boot/cpu.c
··· 16 16 */ 17 17 18 18 #include "boot.h" 19 - #include "bitops.h" 20 - #include <asm/cpufeature.h> 21 - 22 19 #include "cpustr.h" 23 20 24 21 static char *cpu_name(int level)
+1 -9
arch/x86/boot/cpucheck.c
··· 22 22 23 23 #ifdef _SETUP 24 24 # include "boot.h" 25 - # include "bitops.h" 26 25 #endif 27 26 #include <linux/types.h> 28 - #include <asm/cpufeature.h> 29 27 #include <asm/processor-flags.h> 30 28 #include <asm/required-features.h> 31 29 #include <asm/msr-index.h> 32 30 33 - struct cpu_features { 34 - int level; /* Family, or 64 for x86-64 */ 35 - int model; 36 - u32 flags[NCAPINTS]; 37 - }; 38 - 39 - static struct cpu_features cpu; 31 + struct cpu_features cpu; 40 32 static u32 cpu_vendor[3]; 41 33 static u32 err_flags[NCAPINTS]; 42 34
+5
arch/x86/boot/main.c
··· 73 73 */ 74 74 static void query_ist(void) 75 75 { 76 + /* Some older BIOSes apparently crash on this call, so filter 77 + it from machines too old to have SpeedStep at all. */ 78 + if (cpu.level < 6) 79 + return; 80 + 76 81 asm("int $0x15" 77 82 : "=a" (boot_params.ist_info.signature), 78 83 "=b" (boot_params.ist_info.command),
+15 -1
arch/x86/kernel/acpi/boot.c
··· 97 97 #warning ACPI uses CMPXCHG, i486 and later hardware 98 98 #endif 99 99 100 + static int acpi_mcfg_64bit_base_addr __initdata = FALSE; 101 + 100 102 /* -------------------------------------------------------------------------- 101 103 Boot-time Configuration 102 104 -------------------------------------------------------------------------- */ ··· 160 158 struct acpi_mcfg_allocation *pci_mmcfg_config; 161 159 int pci_mmcfg_config_num; 162 160 161 + static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) 162 + { 163 + if (!strcmp(mcfg->header.oem_id, "SGI")) 164 + acpi_mcfg_64bit_base_addr = TRUE; 165 + 166 + return 0; 167 + } 168 + 163 169 int __init acpi_parse_mcfg(struct acpi_table_header *header) 164 170 { 165 171 struct acpi_table_mcfg *mcfg; ··· 200 190 } 201 191 202 192 memcpy(pci_mmcfg_config, &mcfg[1], config_size); 193 + 194 + acpi_mcfg_oem_check(mcfg); 195 + 203 196 for (i = 0; i < pci_mmcfg_config_num; ++i) { 204 - if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { 197 + if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && 198 + !acpi_mcfg_64bit_base_addr) { 205 199 printk(KERN_ERR PREFIX 206 200 "MMCONFIG not in low 4GB of memory\n"); 207 201 kfree(pci_mmcfg_config);
+1 -1
arch/x86/kernel/acpi/sleep.c
··· 20 20 /* address in low memory of the wakeup routine. */ 21 21 static unsigned long acpi_realmode; 22 22 23 - #ifdef CONFIG_64BIT 23 + #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) 24 24 static char temp_stack[10240]; 25 25 #endif 26 26
+11 -8
arch/x86/kernel/amd_iommu.c
··· 101 101 */ 102 102 static int iommu_completion_wait(struct amd_iommu *iommu) 103 103 { 104 - int ret; 104 + int ret, ready = 0; 105 + unsigned status = 0; 105 106 struct iommu_cmd cmd; 106 - volatile u64 ready = 0; 107 - unsigned long ready_phys = virt_to_phys(&ready); 108 107 unsigned long i = 0; 109 108 110 109 memset(&cmd, 0, sizeof(cmd)); 111 - cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; 112 - cmd.data[1] = upper_32_bits(ready_phys); 113 - cmd.data[2] = 1; /* value written to 'ready' */ 110 + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; 114 111 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); 115 112 116 113 iommu->need_sync = 0; ··· 119 122 120 123 while (!ready && (i < EXIT_LOOP_COUNT)) { 121 124 ++i; 122 - cpu_relax(); 125 + /* wait for the bit to become one */ 126 + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); 127 + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; 123 128 } 129 + 130 + /* set bit back to zero */ 131 + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; 132 + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); 124 133 125 134 if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) 126 135 printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); ··· 164 161 address &= PAGE_MASK; 165 162 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); 166 163 cmd.data[1] |= domid; 167 - cmd.data[2] = LOW_U32(address); 164 + cmd.data[2] = lower_32_bits(address); 168 165 cmd.data[3] = upper_32_bits(address); 169 166 if (s) /* size bit - we flush more than one 4kb page */ 170 167 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+22 -4
arch/x86/kernel/amd_iommu_init.c
··· 801 801 } 802 802 803 803 /* 804 + * Init the device table to not allow DMA access for devices and 805 + * suppress all page faults 806 + */ 807 + static void init_device_table(void) 808 + { 809 + u16 devid; 810 + 811 + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { 812 + set_dev_entry_bit(devid, DEV_ENTRY_VALID); 813 + set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); 814 + set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); 815 + } 816 + } 817 + 818 + /* 804 819 * This function finally enables all IOMMUs found in the system after 805 820 * they have been initialized 806 821 */ ··· 946 931 if (amd_iommu_pd_alloc_bitmap == NULL) 947 932 goto free; 948 933 934 + /* init the device table */ 935 + init_device_table(); 936 + 949 937 /* 950 938 * let all alias entries point to itself 951 939 */ ··· 972 954 if (acpi_table_parse("IVRS", init_memory_definitions) != 0) 973 955 goto free; 974 956 975 - ret = amd_iommu_init_dma_ops(); 976 - if (ret) 977 - goto free; 978 - 979 957 ret = sysdev_class_register(&amd_iommu_sysdev_class); 980 958 if (ret) 981 959 goto free; 982 960 983 961 ret = sysdev_register(&device_amd_iommu); 962 + if (ret) 963 + goto free; 964 + 965 + ret = amd_iommu_init_dma_ops(); 984 966 if (ret) 985 967 goto free; 986 968
-8
arch/x86/kernel/apic_32.c
··· 1454 1454 } 1455 1455 } 1456 1456 1457 - unsigned int __cpuinitdata maxcpus = NR_CPUS; 1458 - 1459 1457 void __cpuinit generic_processor_info(int apicid, int version) 1460 1458 { 1461 1459 int cpu; ··· 1477 1479 if (num_processors >= NR_CPUS) { 1478 1480 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1479 1481 " Processor ignored.\n", NR_CPUS); 1480 - return; 1481 - } 1482 - 1483 - if (num_processors >= maxcpus) { 1484 - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." 1485 - " Processor ignored.\n", maxcpus); 1486 1482 return; 1487 1483 } 1488 1484
-7
arch/x86/kernel/apic_64.c
··· 90 90 91 91 unsigned long mp_lapic_addr; 92 92 93 - unsigned int __cpuinitdata maxcpus = NR_CPUS; 94 93 /* 95 94 * Get the LAPIC version 96 95 */ ··· 1058 1059 if (num_processors >= NR_CPUS) { 1059 1060 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1060 1061 " Processor ignored.\n", NR_CPUS); 1061 - return; 1062 - } 1063 - 1064 - if (num_processors >= maxcpus) { 1065 - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." 1066 - " Processor ignored.\n", maxcpus); 1067 1062 return; 1068 1063 } 1069 1064
+7 -1
arch/x86/kernel/cpu/perfctr-watchdog.c
··· 478 478 perfctr_msr = MSR_P4_IQ_PERFCTR1; 479 479 evntsel_msr = MSR_P4_CRU_ESCR0; 480 480 cccr_msr = MSR_P4_IQ_CCCR1; 481 - cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); 481 + 482 + /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ 483 + if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) 484 + cccr_val = P4_CCCR_OVF_PMI0; 485 + else 486 + cccr_val = P4_CCCR_OVF_PMI1; 487 + cccr_val |= P4_CCCR_ESCR_SELECT(4); 482 488 } 483 489 484 490 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+1 -1
arch/x86/kernel/genx2apic_uv_x.c
··· 222 222 223 223 enum map_type {map_wb, map_uc}; 224 224 225 - static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 225 + static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 226 226 { 227 227 unsigned long bytes, paddr; 228 228
+1
arch/x86/kernel/head64.c
··· 88 88 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); 89 89 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == 90 90 (__START_KERNEL & PGDIR_MASK))); 91 + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 91 92 92 93 /* clear bss before set_intr_gate with early_idt_handler */ 93 94 clear_bss();
+24
arch/x86/kernel/hpet.c
··· 359 359 int __init hpet_enable(void) 360 360 { 361 361 unsigned long id; 362 + int i; 362 363 363 364 if (!is_hpet_capable()) 364 365 return 0; ··· 370 369 * Read the period and check for a sane value: 371 370 */ 372 371 hpet_period = hpet_readl(HPET_PERIOD); 372 + 373 + /* 374 + * AMD SB700 based systems with spread spectrum enabled use a 375 + * SMM based HPET emulation to provide proper frequency 376 + * setting. The SMM code is initialized with the first HPET 377 + * register access and takes some time to complete. During 378 + * this time the config register reads 0xffffffff. We check 379 + * for max. 1000 loops whether the config register reads a non 380 + * 0xffffffff value to make sure that HPET is up and running 381 + * before we go further. A counting loop is safe, as the HPET 382 + * access takes thousands of CPU cycles. On non SB700 based 383 + * machines this check is only done once and has no side 384 + * effects. 385 + */ 386 + for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { 387 + if (i == 1000) { 388 + printk(KERN_WARNING 389 + "HPET config register value = 0xFFFFFFFF. " 390 + "Disabling HPET\n"); 391 + goto out_nohpet; 392 + } 393 + } 394 + 373 395 if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) 374 396 goto out_nohpet; 375 397
+37 -15
arch/x86/kernel/mfgpt_32.c
··· 33 33 #include <linux/module.h> 34 34 #include <asm/geode.h> 35 35 36 + #define MFGPT_DEFAULT_IRQ 7 37 + 36 38 static struct mfgpt_timer_t { 37 39 unsigned int avail:1; 38 40 } mfgpt_timers[MFGPT_MAX_TIMERS]; ··· 159 157 } 160 158 EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); 161 159 162 - int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) 160 + int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) 163 161 { 164 - u32 val, dummy; 165 - int offset; 162 + u32 zsel, lpc, dummy; 163 + int shift; 166 164 167 165 if (timer < 0 || timer >= MFGPT_MAX_TIMERS) 168 166 return -EIO; 169 167 170 - if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) 168 + /* 169 + * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA 170 + * is using the same CMP of the timer's Siamese twin, the IRQ is set to 171 + * 2, and we mustn't use nor change it. 172 + * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the 173 + * IRQ of the 1st. This can only happen if forcing an IRQ, calling this 174 + * with *irq==0 is safe. Currently there _are_ no 2 drivers. 175 + */ 176 + rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); 177 + shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; 178 + if (((zsel >> shift) & 0xF) == 2) 171 179 return -EIO; 172 180 173 - rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); 181 + /* Choose IRQ: if none supplied, keep IRQ already set or use default */ 182 + if (!*irq) 183 + *irq = (zsel >> shift) & 0xF; 184 + if (!*irq) 185 + *irq = MFGPT_DEFAULT_IRQ; 174 186 175 - offset = (timer % 4) * 4; 187 + /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ 188 + if (*irq < 1 || *irq == 2 || *irq > 15) 189 + return -EIO; 190 + rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); 191 + if (lpc & (1 << *irq)) 192 + return -EIO; 176 193 177 - val &= ~((0xF << offset) | (0xF << (offset + 16))); 178 - 194 + /* All chosen and checked - go for it */ 195 + if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) 196 + return -EIO; 179 197 if (enable) { 180 - val |= (irq & 0x0F) << (offset); 181 - val |= (irq & 0x0F) << (offset + 16); 198 + zsel = (zsel & ~(0xF << shift)) | (*irq << shift); 199 + wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); 182 200 } 183 201 184 - wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); 185 202 return 0; 186 203 } 187 204 ··· 263 242 static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; 264 243 static u16 mfgpt_event_clock; 265 244 266 - static int irq = 7; 245 + static int irq; 267 246 static int __init mfgpt_setup(char *str) 268 247 { 269 248 get_option(&str, &irq); ··· 367 346 mfgpt_event_clock = timer; 368 347 369 348 /* Set up the IRQ on the MFGPT side */ 370 - if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { 349 + if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { 371 350 printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); 372 351 return -EIO; 373 352 } ··· 395 374 &mfgpt_clockevent); 396 375 397 376 printk(KERN_INFO 398 - "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); 377 + "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", 378 + timer, irq); 399 379 clockevents_register_device(&mfgpt_clockevent); 400 380 401 381 return 0; 402 382 403 383 err: 404 - geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); 384 + geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); 405 385 printk(KERN_ERR 406 386 "mfgpt-timer: Unable to set up the MFGPT clock source\n"); 407 387 return -EIO;
+1 -1
arch/x86/kernel/msr.c
··· 131 131 ret = -EIO; /* MSR not supported */ 132 132 out: 133 133 unlock_kernel(); 134 - return 0; 134 + return ret; 135 135 } 136 136 137 137 /*
+19 -9
arch/x86/kernel/nmi.c
··· 114 114 } 115 115 #endif 116 116 117 + static void report_broken_nmi(int cpu, int *prev_nmi_count) 118 + { 119 + printk(KERN_CONT "\n"); 120 + 121 + printk(KERN_WARNING 122 + "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", 123 + cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); 124 + 125 + printk(KERN_WARNING 126 + "Please report this to bugzilla.kernel.org,\n"); 127 + printk(KERN_WARNING 128 + "and attach the output of the 'dmesg' command.\n"); 129 + 130 + per_cpu(wd_enabled, cpu) = 0; 131 + atomic_dec(&nmi_active); 132 + } 133 + 117 134 int __init check_nmi_watchdog(void) 118 135 { 119 136 unsigned int *prev_nmi_count; ··· 158 141 for_each_online_cpu(cpu) { 159 142 if (!per_cpu(wd_enabled, cpu)) 160 143 continue; 161 - if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 162 - printk(KERN_WARNING "WARNING: CPU#%d: NMI " 163 - "appears to be stuck (%d->%d)!\n", 164 - cpu, 165 - prev_nmi_count[cpu], 166 - get_nmi_count(cpu)); 167 - per_cpu(wd_enabled, cpu) = 0; 168 - atomic_dec(&nmi_active); 169 - } 144 + if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) 145 + report_broken_nmi(cpu, prev_nmi_count); 170 146 } 171 147 endflag = 1; 172 148 if (!atomic_read(&nmi_active)) {
+2 -3
arch/x86/kernel/process_32.c
··· 95 95 { 96 96 /* This must be done before dead CPU ack */ 97 97 cpu_exit_clear(); 98 - wbinvd(); 99 98 mb(); 100 99 /* Ack it */ 101 100 __get_cpu_var(cpu_state) = CPU_DEAD; ··· 103 104 * With physical CPU hotplug, we should halt the cpu 104 105 */ 105 106 local_irq_disable(); 106 - while (1) 107 - halt(); 107 + /* mask all interrupts, flush any and all caches, and halt */ 108 + wbinvd_halt(); 108 109 } 109 110 #else 110 111 static inline void play_dead(void)
+2 -3
arch/x86/kernel/process_64.c
··· 93 93 static inline void play_dead(void) 94 94 { 95 95 idle_task_exit(); 96 - wbinvd(); 97 96 mb(); 98 97 /* Ack it */ 99 98 __get_cpu_var(cpu_state) = CPU_DEAD; 100 99 101 100 local_irq_disable(); 102 - while (1) 103 - halt(); 101 + /* mask all interrupts, flush any and all caches, and halt */ 102 + wbinvd_halt(); 104 103 } 105 104 #else 106 105 static inline void play_dead(void)
+1 -1
arch/x86/kernel/setup.c
··· 445 445 * @size: Size of the crashkernel memory to reserve. 446 446 * Returns the base address on success, and -1ULL on failure. 447 447 */ 448 - unsigned long long find_and_reserve_crashkernel(unsigned long long size) 448 + unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) 449 449 { 450 450 const unsigned long long alignment = 16<<20; /* 16M */ 451 451 unsigned long long start = 0LL;
+10 -1
arch/x86/kernel/signal_64.c
··· 104 104 clts(); 105 105 task_thread_info(current)->status |= TS_USEDFPU; 106 106 } 107 - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); 107 + err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); 108 + if (unlikely(err)) { 109 + /* 110 + * Encountered an error while doing the restore from the 111 + * user buffer, clear the fpu state. 112 + */ 113 + clear_fpu(tsk); 114 + clear_used_math(); 115 + } 116 + return err; 108 117 } 109 118 110 119 /*
+28 -24
arch/x86/kernel/smpboot.c
··· 994 994 flush_tlb_all(); 995 995 low_mappings = 1; 996 996 997 - #ifdef CONFIG_X86_PC 998 - if (def_to_bigsmp && apicid > 8) { 999 - printk(KERN_WARNING 1000 - "More than 8 CPUs detected - skipping them.\n" 1001 - "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); 1002 - err = -1; 1003 - } else 1004 - err = do_boot_cpu(apicid, cpu); 1005 - #else 1006 997 err = do_boot_cpu(apicid, cpu); 1007 - #endif 1008 998 1009 999 zap_low_mappings(); 1010 1000 low_mappings = 0; ··· 1048 1058 static int __init smp_sanity_check(unsigned max_cpus) 1049 1059 { 1050 1060 preempt_disable(); 1061 + 1062 + #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) 1063 + if (def_to_bigsmp && nr_cpu_ids > 8) { 1064 + unsigned int cpu; 1065 + unsigned nr; 1066 + 1067 + printk(KERN_WARNING 1068 + "More than 8 CPUs detected - skipping them.\n" 1069 + "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); 1070 + 1071 + nr = 0; 1072 + for_each_present_cpu(cpu) { 1073 + if (nr >= 8) 1074 + cpu_clear(cpu, cpu_present_map); 1075 + nr++; 1076 + } 1077 + 1078 + nr = 0; 1079 + for_each_possible_cpu(cpu) { 1080 + if (nr >= 8) 1081 + cpu_clear(cpu, cpu_possible_map); 1082 + nr++; 1083 + } 1084 + 1085 + nr_cpu_ids = 8; 1086 + } 1087 + #endif 1088 + 1051 1089 if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { 1052 1090 printk(KERN_WARNING "weird, boot CPU (#%d) not listed" 1053 1091 "by the BIOS.\n", hard_smp_processor_id()); ··· 1404 1386 BUG(); 1405 1387 } 1406 1388 #endif 1407 - 1408 - /* 1409 - * If the BIOS enumerates physical processors before logical, 1410 - * maxcpus=N at enumeration-time can be used to disable HT. 1411 - */ 1412 - static int __init parse_maxcpus(char *arg) 1413 - { 1414 - extern unsigned int maxcpus; 1415 - 1416 - if (arg) 1417 - maxcpus = simple_strtoul(arg, NULL, 0); 1418 - return 0; 1419 - } 1420 - early_param("maxcpus", parse_maxcpus);
+10 -7
arch/x86/kernel/smpcommon.c
··· 8 8 DEFINE_PER_CPU(unsigned long, this_cpu_off); 9 9 EXPORT_PER_CPU_SYMBOL(this_cpu_off); 10 10 11 - /* Initialize the CPU's GDT. This is either the boot CPU doing itself 12 - (still using the master per-cpu area), or a CPU doing it for a 13 - secondary which will soon come up. */ 11 + /* 12 + * Initialize the CPU's GDT. This is either the boot CPU doing itself 13 + * (still using the master per-cpu area), or a CPU doing it for a 14 + * secondary which will soon come up. 15 + */ 14 16 __cpuinit void init_gdt(int cpu) 15 17 { 16 - struct desc_struct *gdt = get_cpu_gdt_table(cpu); 18 + struct desc_struct gdt; 17 19 18 - pack_descriptor(&gdt[GDT_ENTRY_PERCPU], 19 - __per_cpu_offset[cpu], 0xFFFFF, 20 + pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, 20 21 0x2 | DESCTYPE_S, 0x8); 22 + gdt.s = 1; 21 23 22 - gdt[GDT_ENTRY_PERCPU].s = 1; 24 + write_gdt_entry(get_cpu_gdt_table(cpu), 25 + GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); 23 26 24 27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; 25 28 per_cpu(cpu_number, cpu) = cpu;
+8 -1
arch/x86/kernel/traps_64.c
··· 1131 1131 } 1132 1132 1133 1133 clts(); /* Allow maths ops (or we recurse) */ 1134 - restore_fpu_checking(&me->thread.xstate->fxsave); 1134 + /* 1135 + * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1136 + */ 1137 + if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1138 + stts(); 1139 + force_sig(SIGSEGV, me); 1140 + return; 1141 + } 1135 1142 task_thread_info(me)->status |= TS_USEDFPU; 1136 1143 me->fpu_counter++; 1137 1144 }
+2 -4
arch/x86/kernel/visws_quirks.c
··· 184 184 return 1; 185 185 } 186 186 187 - extern unsigned int __cpuinitdata maxcpus; 188 - 189 187 /* 190 188 * The Visual Workstation is Intel MP compliant in the hardware 191 189 * sense, but it doesn't have a BIOS(-configuration table). ··· 242 244 ncpus = CO_CPU_MAX; 243 245 } 244 246 245 - if (ncpus > maxcpus) 246 - ncpus = maxcpus; 247 + if (ncpus > setup_max_cpus) 248 + ncpus = setup_max_cpus; 247 249 248 250 #ifdef CONFIG_X86_LOCAL_APIC 249 251 smp_found_config = 1;
+10 -2
arch/x86/mm/init_64.c
··· 60 60 61 61 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 62 62 63 - int direct_gbpages __meminitdata 63 + int direct_gbpages 64 64 #ifdef CONFIG_DIRECT_GBPAGES 65 65 = 1 66 66 #endif ··· 88 88 89 89 int after_bootmem; 90 90 91 - static __init void *spp_getpage(void) 91 + /* 92 + * NOTE: This function is marked __ref because it calls __init function 93 + * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 94 + */ 95 + static __ref void *spp_getpage(void) 92 96 { 93 97 void *ptr; 94 98 ··· 318 314 { 319 315 unsigned long pages = 0; 320 316 unsigned long last_map_addr = end; 317 + unsigned long start = address; 321 318 322 319 int i = pmd_index(address); 323 320 ··· 339 334 if (!pmd_large(*pmd)) 340 335 last_map_addr = phys_pte_update(pmd, address, 341 336 end); 337 + /* Count entries we're using from level2_ident_pgt */ 338 + if (start == 0) 339 + pages++; 342 340 continue; 343 341 } 344 342
+1 -2
arch/x86/mm/pageattr-test.c
··· 221 221 failed += print_split(&sc); 222 222 223 223 if (failed) { 224 - printk(KERN_ERR "NOT PASSED. Please report.\n"); 225 - WARN_ON(1); 224 + WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); 226 225 return -EINVAL; 227 226 } else { 228 227 if (print)
+13 -8
arch/x86/mm/pageattr.c
··· 55 55 56 56 int arch_report_meminfo(char *page) 57 57 { 58 - int n = sprintf(page, "DirectMap4k: %8lu\n" 59 - "DirectMap2M: %8lu\n", 60 - direct_pages_count[PG_LEVEL_4K], 61 - direct_pages_count[PG_LEVEL_2M]); 58 + int n = sprintf(page, "DirectMap4k: %8lu kB\n", 59 + direct_pages_count[PG_LEVEL_4K] << 2); 60 + #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) 61 + n += sprintf(page + n, "DirectMap2M: %8lu kB\n", 62 + direct_pages_count[PG_LEVEL_2M] << 11); 63 + #else 64 + n += sprintf(page + n, "DirectMap4M: %8lu kB\n", 65 + direct_pages_count[PG_LEVEL_2M] << 12); 66 + #endif 62 67 #ifdef CONFIG_X86_64 63 - n += sprintf(page + n, "DirectMap1G: %8lu\n", 64 - direct_pages_count[PG_LEVEL_1G]); 68 + if (direct_gbpages) 69 + n += sprintf(page + n, "DirectMap1G: %8lu kB\n", 70 + direct_pages_count[PG_LEVEL_1G] << 20); 65 71 #endif 66 72 return n; 67 73 } ··· 598 592 if (!pte_val(old_pte)) { 599 593 if (!primary) 600 594 return 0; 601 - printk(KERN_WARNING "CPA: called for zero pte. " 595 + WARN(1, KERN_WARNING "CPA: called for zero pte. " 602 596 "vaddr = %lx cpa->vaddr = %lx\n", address, 603 597 cpa->vaddr); 604 - WARN_ON(1); 605 598 return -EINVAL; 606 599 } 607 600
+8 -4
arch/x86/mm/srat_32.c
··· 178 178 * start of the node, and that the current "end" address is after 179 179 * the previous one. 180 180 */ 181 - static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) 181 + static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) 182 182 { 183 183 /* 184 184 * Only add present memory as told by the e820. ··· 189 189 if (memory_chunk->start_pfn >= max_pfn) { 190 190 printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", 191 191 memory_chunk->start_pfn, memory_chunk->end_pfn); 192 - return; 192 + return -1; 193 193 } 194 194 if (memory_chunk->nid != nid) 195 - return; 195 + return -1; 196 196 197 197 if (!node_has_online_mem(nid)) 198 198 node_start_pfn[nid] = memory_chunk->start_pfn; ··· 202 202 203 203 if (node_end_pfn[nid] < memory_chunk->end_pfn) 204 204 node_end_pfn[nid] = memory_chunk->end_pfn; 205 + 206 + return 0; 205 207 } 206 208 207 209 int __init get_memcfg_from_srat(void) ··· 261 259 printk(KERN_DEBUG 262 260 "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", 263 261 j, chunk->nid, chunk->start_pfn, chunk->end_pfn); 264 - node_read_chunk(chunk->nid, chunk); 262 + if (node_read_chunk(chunk->nid, chunk)) 263 + continue; 264 + 265 265 e820_register_active_regions(chunk->nid, chunk->start_pfn, 266 266 min(chunk->end_pfn, max_pfn)); 267 267 }
+1 -1
arch/x86/pci/mmconfig-shared.c
··· 365 365 return; 366 366 367 367 reject: 368 - printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); 368 + printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); 369 369 pci_mmcfg_arch_free(); 370 370 kfree(pci_mmcfg_config); 371 371 pci_mmcfg_config = NULL;
+5 -3
include/asm-x86/amd_iommu_types.h
··· 31 31 #define ALIAS_TABLE_ENTRY_SIZE 2 32 32 #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) 33 33 34 - /* helper macros */ 35 - #define LOW_U32(x) ((x) & ((1ULL << 32)-1)) 36 - 37 34 /* Length of the MMIO region for the AMD IOMMU */ 38 35 #define MMIO_REGION_LENGTH 0x4000 39 36 ··· 66 69 #define MMIO_EVT_TAIL_OFFSET 0x2018 67 70 #define MMIO_STATUS_OFFSET 0x2020 68 71 72 + /* MMIO status bits */ 73 + #define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 74 + 69 75 /* feature control bits */ 70 76 #define CONTROL_IOMMU_EN 0x00ULL 71 77 #define CONTROL_HT_TUN_EN 0x01ULL ··· 89 89 #define CMD_INV_IOMMU_PAGES 0x03 90 90 91 91 #define CMD_COMPL_WAIT_STORE_MASK 0x01 92 + #define CMD_COMPL_WAIT_INT_MASK 0x02 92 93 #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 93 94 #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 94 95 ··· 100 99 #define DEV_ENTRY_TRANSLATION 0x01 101 100 #define DEV_ENTRY_IR 0x3d 102 101 #define DEV_ENTRY_IW 0x3e 102 + #define DEV_ENTRY_NO_PAGE_FAULT 0x62 103 103 #define DEV_ENTRY_EX 0x67 104 104 #define DEV_ENTRY_SYSMGT1 0x68 105 105 #define DEV_ENTRY_SYSMGT2 0x69
+2 -1
include/asm-x86/geode.h
··· 50 50 #define MSR_PIC_YSEL_HIGH 0x51400021 51 51 #define MSR_PIC_ZSEL_LOW 0x51400022 52 52 #define MSR_PIC_ZSEL_HIGH 0x51400023 53 + #define MSR_PIC_IRQM_LPC 0x51400025 53 54 54 55 #define MSR_MFGPT_IRQ 0x51400028 55 56 #define MSR_MFGPT_NR 0x51400029 ··· 238 237 } 239 238 240 239 extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); 241 - extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); 240 + extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); 242 241 extern int geode_mfgpt_alloc_timer(int timer, int domain); 243 242 244 243 #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1)
-2
include/asm-x86/i387.h
··· 63 63 #else 64 64 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); 65 65 #endif 66 - if (unlikely(err)) 67 - init_fpu(current); 68 66 return err; 69 67 } 70 68
+9 -9
include/asm-x86/io.h
··· 21 21 22 22 #define build_mmio_read(name, size, type, reg, barrier) \ 23 23 static inline type name(const volatile void __iomem *addr) \ 24 - { type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ 24 + { type ret; asm volatile("mov" size " %1,%0":reg (ret) \ 25 25 :"m" (*(volatile type __force *)addr) barrier); return ret; } 26 26 27 27 #define build_mmio_write(name, size, type, reg, barrier) \ ··· 29 29 { asm volatile("mov" size " %0,%1": :reg (val), \ 30 30 "m" (*(volatile type __force *)addr) barrier); } 31 31 32 - build_mmio_read(readb, "b", unsigned char, "q", :"memory") 33 - build_mmio_read(readw, "w", unsigned short, "r", :"memory") 34 - build_mmio_read(readl, "l", unsigned int, "r", :"memory") 32 + build_mmio_read(readb, "b", unsigned char, "=q", :"memory") 33 + build_mmio_read(readw, "w", unsigned short, "=r", :"memory") 34 + build_mmio_read(readl, "l", unsigned int, "=r", :"memory") 35 35 36 - build_mmio_read(__readb, "b", unsigned char, "q", ) 37 - build_mmio_read(__readw, "w", unsigned short, "r", ) 38 - build_mmio_read(__readl, "l", unsigned int, "r", ) 36 + build_mmio_read(__readb, "b", unsigned char, "=q", ) 37 + build_mmio_read(__readw, "w", unsigned short, "=r", ) 38 + build_mmio_read(__readl, "l", unsigned int, "=r", ) 39 39 40 40 build_mmio_write(writeb, "b", unsigned char, "q", :"memory") 41 41 build_mmio_write(writew, "w", unsigned short, "r", :"memory") ··· 59 59 #define mmiowb() barrier() 60 60 61 61 #ifdef CONFIG_X86_64 62 - build_mmio_read(readq, "q", unsigned long, "r", :"memory") 63 - build_mmio_read(__readq, "q", unsigned long, "r", ) 62 + build_mmio_read(readq, "q", unsigned long, "=r", :"memory") 63 + build_mmio_read(__readq, "q", unsigned long, "=r", ) 64 64 build_mmio_write(writeq, "q", unsigned long, "r", :"memory") 65 65 build_mmio_write(__writeq, "q", unsigned long, "r", ) 66 66
+6
include/asm-x86/mmzone_32.h
··· 97 97 reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags)) 98 98 #define alloc_bootmem(x) \ 99 99 __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 100 + #define alloc_bootmem_nopanic(x) \ 101 + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ 102 + __pa(MAX_DMA_ADDRESS)) 100 103 #define alloc_bootmem_low(x) \ 101 104 __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) 102 105 #define alloc_bootmem_pages(x) \ 103 106 __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 107 + #define alloc_bootmem_pages_nopanic(x) \ 108 + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \ 109 + __pa(MAX_DMA_ADDRESS)) 104 110 #define alloc_bootmem_low_pages(x) \ 105 111 __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) 106 112 #define alloc_bootmem_node(pgdat, x) \
+1 -1
include/asm-x86/pgtable_64.h
··· 151 151 #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) 152 152 #define VMEMMAP_START _AC(0xffffe20000000000, UL) 153 153 #define MODULES_VADDR _AC(0xffffffffa0000000, UL) 154 - #define MODULES_END _AC(0xfffffffffff00000, UL) 154 + #define MODULES_END _AC(0xffffffffff000000, UL) 155 155 #define MODULES_LEN (MODULES_END - MODULES_VADDR) 156 156 157 157 #ifndef __ASSEMBLY__
+23
include/asm-x86/processor.h
··· 728 728 extern unsigned long idle_halt; 729 729 extern unsigned long idle_nomwait; 730 730 731 + /* 732 + * on systems with caches, caches must be flashed as the absolute 733 + * last instruction before going into a suspended halt. Otherwise, 734 + * dirty data can linger in the cache and become stale on resume, 735 + * leading to strange errors. 736 + * 737 + * perform a variety of operations to guarantee that the compiler 738 + * will not reorder instructions. wbinvd itself is serializing 739 + * so the processor will not reorder. 740 + * 741 + * Systems without cache can just go into halt. 742 + */ 743 + static inline void wbinvd_halt(void) 744 + { 745 + mb(); 746 + /* check for clflush to determine if wbinvd is legal */ 747 + if (cpu_has_clflush) 748 + asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); 749 + else 750 + while (1) 751 + halt(); 752 + } 753 + 731 754 extern void enable_sep_cpu(void); 732 755 extern int sysenter_setup(void); 733 756
+2 -2
include/asm-x86/spinlock.h
··· 65 65 { 66 66 int tmp = ACCESS_ONCE(lock->slock); 67 67 68 - return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; 68 + return (((tmp >> 8) - tmp) & 0xff) > 1; 69 69 } 70 70 71 71 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) ··· 127 127 { 128 128 int tmp = ACCESS_ONCE(lock->slock); 129 129 130 - return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; 130 + return (((tmp >> 16) - tmp) & 0xffff) > 1; 131 131 } 132 132 133 133 static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)