Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
"Hopefully the last round of fixes for 3.19

- regression fix for the LDT changes
- regression fix for XEN interrupt handling caused by the APIC
changes
- regression fixes for the PAT changes
- last minute fixes for new the MPX support
- regression fix for 32bit UP
- fix for a long standing relocation issue on 64bit tagged for stable
- functional fix for the Hyper-V clocksource tagged for stable
- downgrade of a pr_err which tends to confuse users

Looks a bit on the large side, but almost half of it are valuable
comments"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/tsc: Change Fast TSC calibration failed from error to info
x86/apic: Re-enable PCI_MSI support for non-SMP X86_32
x86, mm: Change cachemode exports to non-gpl
x86, tls: Interpret an all-zero struct user_desc as "no segment"
x86, tls, ldt: Stop checking lm in LDT_empty
x86, mpx: Strictly enforce empty prctl() args
x86, mpx: Fix potential performance issue on unmaps
x86, mpx: Explicitly disable 32-bit MPX support on 64-bit kernels
x86, hyperv: Mark the Hyper-V clocksource as being continuous
x86: Don't rely on VMWare emulating PAT MSR correctly
x86, irq: Properly tag virtualization entry in /proc/interrupts
x86, boot: Skip relocs when load address unchanged
x86/xen: Override ACPI IRQ management callback __acpi_unregister_gsi
ACPI: pci: Do not clear pci_dev->irq in acpi_pci_irq_disable()
x86/xen: Treat SCI interrupt as normal GSI interrupt

Changed files
+105 -76
arch
x86
boot
compressed
include
kernel
mm
pci
drivers
acpi
kernel
+5 -1
arch/x86/Kconfig
··· 857 857 858 858 config X86_UP_APIC 859 859 bool "Local APIC support on uniprocessors" 860 - depends on X86_32 && !SMP && !X86_32_NON_STANDARD && !PCI_MSI 860 + depends on X86_32 && !SMP && !X86_32_NON_STANDARD 861 861 ---help--- 862 862 A local APIC (Advanced Programmable Interrupt Controller) is an 863 863 integrated interrupt controller in the CPU. If you have a single-CPU ··· 867 867 all. The local APIC supports CPU-generated self-interrupts (timer, 868 868 performance counters), and the NMI watchdog which detects hard 869 869 lockups. 870 + 871 + config X86_UP_APIC_MSI 872 + def_bool y 873 + select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI 870 874 871 875 config X86_UP_IOAPIC 872 876 bool "IO-APIC support on uniprocessors"
+8 -1
arch/x86/boot/compressed/misc.c
··· 373 373 unsigned long output_len, 374 374 unsigned long run_size) 375 375 { 376 + unsigned char *output_orig = output; 377 + 376 378 real_mode = rmode; 377 379 378 380 sanitize_boot_params(real_mode); ··· 423 421 debug_putstr("\nDecompressing Linux... "); 424 422 decompress(input_data, input_len, NULL, NULL, output, NULL, error); 425 423 parse_elf(output); 426 - handle_relocations(output, output_len); 424 + /* 425 + * 32-bit always performs relocations. 64-bit relocations are only 426 + * needed if kASLR has chosen a different load address. 427 + */ 428 + if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) 429 + handle_relocations(output, output_len); 427 430 debug_putstr("done.\nBooting the kernel.\n"); 428 431 return output; 429 432 }
+1
arch/x86/include/asm/acpi.h
··· 50 50 51 51 extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi, 52 52 int trigger, int polarity); 53 + extern void (*__acpi_unregister_gsi)(u32 gsi); 53 54 54 55 static inline void disable_acpi(void) 55 56 {
+14 -6
arch/x86/include/asm/desc.h
··· 251 251 gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; 252 252 } 253 253 254 - #define _LDT_empty(info) \ 254 + /* This intentionally ignores lm, since 32-bit apps don't have that field. */ 255 + #define LDT_empty(info) \ 255 256 ((info)->base_addr == 0 && \ 256 257 (info)->limit == 0 && \ 257 258 (info)->contents == 0 && \ ··· 262 261 (info)->seg_not_present == 1 && \ 263 262 (info)->useable == 0) 264 263 265 - #ifdef CONFIG_X86_64 266 - #define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0)) 267 - #else 268 - #define LDT_empty(info) (_LDT_empty(info)) 269 - #endif 264 + /* Lots of programs expect an all-zero user_desc to mean "no segment at all". */ 265 + static inline bool LDT_zero(const struct user_desc *info) 266 + { 267 + return (info->base_addr == 0 && 268 + info->limit == 0 && 269 + info->contents == 0 && 270 + info->read_exec_only == 0 && 271 + info->seg_32bit == 0 && 272 + info->limit_in_pages == 0 && 273 + info->seg_not_present == 0 && 274 + info->useable == 0); 275 + } 270 276 271 277 static inline void clear_LDT(void) 272 278 {
+19 -1
arch/x86/include/asm/mmu_context.h
··· 130 130 static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, 131 131 unsigned long start, unsigned long end) 132 132 { 133 - mpx_notify_unmap(mm, vma, start, end); 133 + /* 134 + * mpx_notify_unmap() goes and reads a rarely-hot 135 + * cacheline in the mm_struct. That can be expensive 136 + * enough to be seen in profiles. 137 + * 138 + * The mpx_notify_unmap() call and its contents have been 139 + * observed to affect munmap() performance on hardware 140 + * where MPX is not present. 141 + * 142 + * The unlikely() optimizes for the fast case: no MPX 143 + * in the CPU, or no MPX use in the process. Even if 144 + * we get this wrong (in the unlikely event that MPX 145 + * is widely enabled on some system) the overhead of 146 + * MPX itself (reading bounds tables) is expected to 147 + * overwhelm the overhead of getting this unlikely() 148 + * consistently wrong. 149 + */ 150 + if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX))) 151 + mpx_notify_unmap(mm, vma, start, end); 134 152 } 135 153 136 154 #endif /* _ASM_X86_MMU_CONTEXT_H */
+12 -12
arch/x86/kernel/acpi/boot.c
··· 611 611 612 612 int acpi_gsi_to_irq(u32 gsi, unsigned int *irqp) 613 613 { 614 - int irq; 614 + int rc, irq, trigger, polarity; 615 615 616 - if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { 617 - *irqp = gsi; 618 - } else { 619 - mutex_lock(&acpi_ioapic_lock); 620 - irq = mp_map_gsi_to_irq(gsi, 621 - IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK); 622 - mutex_unlock(&acpi_ioapic_lock); 623 - if (irq < 0) 624 - return -1; 625 - *irqp = irq; 616 + rc = acpi_get_override_irq(gsi, &trigger, &polarity); 617 + if (rc == 0) { 618 + trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; 619 + polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; 620 + irq = acpi_register_gsi(NULL, gsi, trigger, polarity); 621 + if (irq >= 0) { 622 + *irqp = irq; 623 + return 0; 624 + } 626 625 } 627 - return 0; 626 + 627 + return -1; 628 628 } 629 629 EXPORT_SYMBOL_GPL(acpi_gsi_to_irq); 630 630
+1
arch/x86/kernel/cpu/mshyperv.c
··· 107 107 .rating = 400, /* use this when running on Hyperv*/ 108 108 .read = read_hv_clock, 109 109 .mask = CLOCKSOURCE_MASK(64), 110 + .flags = CLOCK_SOURCE_IS_CONTINUOUS, 110 111 }; 111 112 112 113 static void __init ms_hyperv_init_platform(void)
+1 -1
arch/x86/kernel/irq.c
··· 127 127 seq_puts(p, " Machine check polls\n"); 128 128 #endif 129 129 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) 130 - seq_printf(p, "%*s: ", prec, "THR"); 130 + seq_printf(p, "%*s: ", prec, "HYP"); 131 131 for_each_online_cpu(j) 132 132 seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); 133 133 seq_puts(p, " Hypervisor callback interrupts\n");
+23 -2
arch/x86/kernel/tls.c
··· 29 29 30 30 static bool tls_desc_okay(const struct user_desc *info) 31 31 { 32 - if (LDT_empty(info)) 32 + /* 33 + * For historical reasons (i.e. no one ever documented how any 34 + * of the segmentation APIs work), user programs can and do 35 + * assume that a struct user_desc that's all zeros except for 36 + * entry_number means "no segment at all". This never actually 37 + * worked. In fact, up to Linux 3.19, a struct user_desc like 38 + * this would create a 16-bit read-write segment with base and 39 + * limit both equal to zero. 40 + * 41 + * That was close enough to "no segment at all" until we 42 + * hardened this function to disallow 16-bit TLS segments. Fix 43 + * it up by interpreting these zeroed segments the way that they 44 + * were almost certainly intended to be interpreted. 45 + * 46 + * The correct way to ask for "no segment at all" is to specify 47 + * a user_desc that satisfies LDT_empty. To keep everything 48 + * working, we accept both. 49 + * 50 + * Note that there's a similar kludge in modify_ldt -- look at 51 + * the distinction between modes 1 and 0x11. 52 + */ 53 + if (LDT_empty(info) || LDT_zero(info)) 33 54 return true; 34 55 35 56 /* ··· 92 71 cpu = get_cpu(); 93 72 94 73 while (n-- > 0) { 95 - if (LDT_empty(info)) 74 + if (LDT_empty(info) || LDT_zero(info)) 96 75 desc->a = desc->b = 0; 97 76 else 98 77 fill_ldt(desc, info);
+1 -1
arch/x86/kernel/tsc.c
··· 617 617 goto success; 618 618 } 619 619 } 620 - pr_err("Fast TSC calibration failed\n"); 620 + pr_info("Fast TSC calibration failed\n"); 621 621 return 0; 622 622 623 623 success:
+2 -2
arch/x86/mm/init.c
··· 43 43 [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, 44 44 [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, 45 45 }; 46 - EXPORT_SYMBOL_GPL(__cachemode2pte_tbl); 46 + EXPORT_SYMBOL(__cachemode2pte_tbl); 47 47 uint8_t __pte2cachemode_tbl[8] = { 48 48 [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, 49 49 [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, ··· 54 54 [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, 55 55 [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, 56 56 }; 57 - EXPORT_SYMBOL_GPL(__pte2cachemode_tbl); 57 + EXPORT_SYMBOL(__pte2cachemode_tbl); 58 58 59 59 static unsigned long __initdata pgt_buf_start; 60 60 static unsigned long __initdata pgt_buf_end;
+6
arch/x86/mm/mpx.c
··· 349 349 return MPX_INVALID_BOUNDS_DIR; 350 350 351 351 /* 352 + * 32-bit binaries on 64-bit kernels are currently 353 + * unsupported. 354 + */ 355 + if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) 356 + return MPX_INVALID_BOUNDS_DIR; 357 + /* 352 358 * The bounds directory pointer is stored in a register 353 359 * only accessible if we first do an xsave. 354 360 */
+6 -1
arch/x86/mm/pat.c
··· 234 234 PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC); 235 235 236 236 /* Boot CPU check */ 237 - if (!boot_pat_state) 237 + if (!boot_pat_state) { 238 238 rdmsrl(MSR_IA32_CR_PAT, boot_pat_state); 239 + if (!boot_pat_state) { 240 + pat_disable("PAT read returns always zero, disabled."); 241 + return; 242 + } 243 + } 239 244 240 245 wrmsrl(MSR_IA32_CR_PAT, pat); 241 246
+2 -47
arch/x86/pci/xen.c
··· 458 458 * just how GSIs get registered. 459 459 */ 460 460 __acpi_register_gsi = acpi_register_gsi_xen_hvm; 461 + __acpi_unregister_gsi = NULL; 461 462 #endif 462 463 463 464 #ifdef CONFIG_PCI_MSI ··· 472 471 } 473 472 474 473 #ifdef CONFIG_XEN_DOM0 475 - static __init void xen_setup_acpi_sci(void) 476 - { 477 - int rc; 478 - int trigger, polarity; 479 - int gsi = acpi_sci_override_gsi; 480 - int irq = -1; 481 - int gsi_override = -1; 482 - 483 - if (!gsi) 484 - return; 485 - 486 - rc = acpi_get_override_irq(gsi, &trigger, &polarity); 487 - if (rc) { 488 - printk(KERN_WARNING "xen: acpi_get_override_irq failed for acpi" 489 - " sci, rc=%d\n", rc); 490 - return; 491 - } 492 - trigger = trigger ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; 493 - polarity = polarity ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; 494 - 495 - printk(KERN_INFO "xen: sci override: global_irq=%d trigger=%d " 496 - "polarity=%d\n", gsi, trigger, polarity); 497 - 498 - /* Before we bind the GSI to a Linux IRQ, check whether 499 - * we need to override it with bus_irq (IRQ) value. Usually for 500 - * IRQs below IRQ_LEGACY_IRQ this holds IRQ == GSI, as so: 501 - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 low level) 502 - * but there are oddballs where the IRQ != GSI: 503 - * ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 20 low level) 504 - * which ends up being: gsi_to_irq[9] == 20 505 - * (which is what acpi_gsi_to_irq ends up calling when starting the 506 - * the ACPI interpreter and keels over since IRQ 9 has not been 507 - * setup as we had setup IRQ 20 for it). 508 - */ 509 - if (acpi_gsi_to_irq(gsi, &irq) == 0) { 510 - /* Use the provided value if it's valid. */ 511 - if (irq >= 0) 512 - gsi_override = irq; 513 - } 514 - 515 - gsi = xen_register_gsi(gsi, gsi_override, trigger, polarity); 516 - printk(KERN_INFO "xen: acpi sci %d\n", gsi); 517 - 518 - return; 519 - } 520 - 521 474 int __init pci_xen_initial_domain(void) 522 475 { 523 476 int irq; ··· 482 527 x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs; 483 528 pci_msi_ignore_mask = 1; 484 529 #endif 485 - xen_setup_acpi_sci(); 486 530 __acpi_register_gsi = acpi_register_gsi_xen; 531 + __acpi_unregister_gsi = NULL; 487 532 /* Pre-allocate legacy irqs */ 488 533 for (irq = 0; irq < nr_legacy_irqs(); irq++) { 489 534 int trigger, polarity;
-1
drivers/acpi/pci_irq.c
··· 512 512 dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); 513 513 if (gsi >= 0) { 514 514 acpi_unregister_gsi(gsi); 515 - dev->irq = 0; 516 515 dev->irq_managed = 0; 517 516 } 518 517 }
+4
kernel/sys.c
··· 2210 2210 up_write(&me->mm->mmap_sem); 2211 2211 break; 2212 2212 case PR_MPX_ENABLE_MANAGEMENT: 2213 + if (arg2 || arg3 || arg4 || arg5) 2214 + return -EINVAL; 2213 2215 error = MPX_ENABLE_MANAGEMENT(me); 2214 2216 break; 2215 2217 case PR_MPX_DISABLE_MANAGEMENT: 2218 + if (arg2 || arg3 || arg4 || arg5) 2219 + return -EINVAL; 2216 2220 error = MPX_DISABLE_MANAGEMENT(me); 2217 2221 break; 2218 2222 default: