Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 and PTI fixes from Ingo Molnar:
"Misc fixes:

- fix EFI pagetables freeing

- fix vsyscall pagetable setting on Xen PV guests

- remove ancient CONFIG_X86_PPRO_FENCE=y - x86 is TSO again

- fix two binutils (ld) development version related incompatibilities

- clean up breakpoint handling

- fix an x86 self-test"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/64: Don't use IST entry for #BP stack
x86/efi: Free efi_pgd with free_pages()
x86/vsyscall/64: Use proper accessor to update P4D entry
x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk
x86/boot/64: Verify alignment of the LOAD segment
x86/build/64: Force the linker to use 2MB page size
selftests/x86/ptrace_syscall: Fix for yet more glibc interference

Changed files
+30 -97
arch
x86
boot
compressed
entry
include
kernel
platform
efi
um
asm
tools
testing
selftests
-13
arch/x86/Kconfig.cpu
··· 315 315 default "4" if MELAN || M486 || MGEODEGX1 316 316 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 317 317 318 - config X86_PPRO_FENCE 319 - bool "PentiumPro memory ordering errata workaround" 320 - depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1 321 - ---help--- 322 - Old PentiumPro multiprocessor systems had errata that could cause 323 - memory operations to violate the x86 ordering standard in rare cases. 324 - Enabling this option will attempt to work around some (but not all) 325 - occurrences of this problem, at the cost of much heavier spinlock and 326 - memory barrier operations. 327 - 328 - If unsure, say n here. Even distro kernels should think twice before 329 - enabling this: there are few systems, and an unlikely bug. 330 - 331 318 config X86_F00F_BUG 332 319 def_bool y 333 320 depends on M586MMX || M586TSC || M586 || M486
+9
arch/x86/Makefile
··· 223 223 224 224 LDFLAGS := -m elf_$(UTS_MACHINE) 225 225 226 + # 227 + # The 64-bit kernel must be aligned to 2MB. Pass -z max-page-size=0x200000 to 228 + # the linker to force 2MB page size regardless of the default page size used 229 + # by the linker. 230 + # 231 + ifdef CONFIG_X86_64 232 + LDFLAGS += $(call ld-option, -z max-page-size=0x200000) 233 + endif 234 + 226 235 # Speed up the build 227 236 KBUILD_CFLAGS += -pipe 228 237 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
+4
arch/x86/boot/compressed/misc.c
··· 309 309 310 310 switch (phdr->p_type) { 311 311 case PT_LOAD: 312 + #ifdef CONFIG_X86_64 313 + if ((phdr->p_align % 0x200000) != 0) 314 + error("Alignment of LOAD segment isn't multiple of 2MB"); 315 + #endif 312 316 #ifdef CONFIG_RELOCATABLE 313 317 dest = output; 314 318 dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+1 -1
arch/x86/entry/entry_64.S
··· 1138 1138 #endif /* CONFIG_HYPERV */ 1139 1139 1140 1140 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1141 - idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1141 + idtentry int3 do_int3 has_error_code=0 1142 1142 idtentry stack_segment do_stack_segment has_error_code=1 1143 1143 1144 1144 #ifdef CONFIG_XEN
-2
arch/x86/entry/vdso/vdso32/vclock_gettime.c
··· 5 5 #undef CONFIG_OPTIMIZE_INLINING 6 6 #endif 7 7 8 - #undef CONFIG_X86_PPRO_FENCE 9 - 10 8 #ifdef CONFIG_X86_64 11 9 12 10 /*
+1 -1
arch/x86/entry/vsyscall/vsyscall_64.c
··· 347 347 set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER)); 348 348 p4d = p4d_offset(pgd, VSYSCALL_ADDR); 349 349 #if CONFIG_PGTABLE_LEVELS >= 5 350 - p4d->p4d |= _PAGE_USER; 350 + set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER)); 351 351 #endif 352 352 pud = pud_offset(p4d, VSYSCALL_ADDR); 353 353 set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
-30
arch/x86/include/asm/barrier.h
··· 52 52 #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ 53 53 "lfence", X86_FEATURE_LFENCE_RDTSC) 54 54 55 - #ifdef CONFIG_X86_PPRO_FENCE 56 - #define dma_rmb() rmb() 57 - #else 58 55 #define dma_rmb() barrier() 59 - #endif 60 56 #define dma_wmb() barrier() 61 57 62 58 #ifdef CONFIG_X86_32 ··· 64 68 #define __smp_wmb() barrier() 65 69 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) 66 70 67 - #if defined(CONFIG_X86_PPRO_FENCE) 68 - 69 - /* 70 - * For this option x86 doesn't have a strong TSO memory 71 - * model and we should fall back to full barriers. 72 - */ 73 - 74 - #define __smp_store_release(p, v) \ 75 - do { \ 76 - compiletime_assert_atomic_type(*p); \ 77 - __smp_mb(); \ 78 - WRITE_ONCE(*p, v); \ 79 - } while (0) 80 - 81 - #define __smp_load_acquire(p) \ 82 - ({ \ 83 - typeof(*p) ___p1 = READ_ONCE(*p); \ 84 - compiletime_assert_atomic_type(*p); \ 85 - __smp_mb(); \ 86 - ___p1; \ 87 - }) 88 - 89 - #else /* regular x86 TSO memory ordering */ 90 - 91 71 #define __smp_store_release(p, v) \ 92 72 do { \ 93 73 compiletime_assert_atomic_type(*p); \ ··· 78 106 barrier(); \ 79 107 ___p1; \ 80 108 }) 81 - 82 - #endif 83 109 84 110 /* Atomic operations are already serializing on x86 */ 85 111 #define __smp_mb__before_atomic() barrier()
-15
arch/x86/include/asm/io.h
··· 232 232 */ 233 233 #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) 234 234 235 - /* 236 - * Cache management 237 - * 238 - * This needed for two cases 239 - * 1. Out of order aware processors 240 - * 2. Accidentally out of order processors (PPro errata #51) 241 - */ 242 - 243 - static inline void flush_write_buffers(void) 244 - { 245 - #if defined(CONFIG_X86_PPRO_FENCE) 246 - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); 247 - #endif 248 - } 249 - 250 235 #endif /* __KERNEL__ */ 251 236 252 237 extern void native_io_delay(void);
-2
arch/x86/kernel/idt.c
··· 160 160 */ 161 161 static const __initconst struct idt_data dbg_idts[] = { 162 162 INTG(X86_TRAP_DB, debug), 163 - INTG(X86_TRAP_BP, int3), 164 163 }; 165 164 #endif 166 165 ··· 182 183 static const __initconst struct idt_data ist_idts[] = { 183 184 ISTG(X86_TRAP_DB, debug, DEBUG_STACK), 184 185 ISTG(X86_TRAP_NMI, nmi, NMI_STACK), 185 - SISTG(X86_TRAP_BP, int3, DEBUG_STACK), 186 186 ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK), 187 187 #ifdef CONFIG_X86_MCE 188 188 ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
-19
arch/x86/kernel/pci-nommu.c
··· 37 37 WARN_ON(size == 0); 38 38 if (!check_addr("map_single", dev, bus, size)) 39 39 return NOMMU_MAPPING_ERROR; 40 - flush_write_buffers(); 41 40 return bus; 42 41 } 43 42 ··· 71 72 return 0; 72 73 s->dma_length = s->length; 73 74 } 74 - flush_write_buffers(); 75 75 return nents; 76 - } 77 - 78 - static void nommu_sync_single_for_device(struct device *dev, 79 - dma_addr_t addr, size_t size, 80 - enum dma_data_direction dir) 81 - { 82 - flush_write_buffers(); 83 - } 84 - 85 - 86 - static void nommu_sync_sg_for_device(struct device *dev, 87 - struct scatterlist *sg, int nelems, 88 - enum dma_data_direction dir) 89 - { 90 - flush_write_buffers(); 91 76 } 92 77 93 78 static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr) ··· 84 101 .free = dma_generic_free_coherent, 85 102 .map_sg = nommu_map_sg, 86 103 .map_page = nommu_map_page, 87 - .sync_single_for_device = nommu_sync_single_for_device, 88 - .sync_sg_for_device = nommu_sync_sg_for_device, 89 104 .is_phys = 1, 90 105 .mapping_error = nommu_mapping_error, 91 106 .dma_supported = x86_dma_supported,
+8 -7
arch/x86/kernel/traps.c
··· 577 577 } 578 578 NOKPROBE_SYMBOL(do_general_protection); 579 579 580 - /* May run on IST stack. */ 581 580 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 582 581 { 583 582 #ifdef CONFIG_DYNAMIC_FTRACE ··· 591 592 if (poke_int3_handler(regs)) 592 593 return; 593 594 595 + /* 596 + * Use ist_enter despite the fact that we don't use an IST stack. 597 + * We can be called from a kprobe in non-CONTEXT_KERNEL kernel 598 + * mode or even during context tracking state changes. 599 + * 600 + * This means that we can't schedule. That's okay. 601 + */ 594 602 ist_enter(regs); 595 603 RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); 596 604 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP ··· 615 609 SIGTRAP) == NOTIFY_STOP) 616 610 goto exit; 617 611 618 - /* 619 - * Let others (NMI) know that the debug stack is in use 620 - * as we may switch to the interrupt stack. 621 - */ 622 - debug_stack_usage_inc(); 623 612 cond_local_irq_enable(regs); 624 613 do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); 625 614 cond_local_irq_disable(regs); 626 - debug_stack_usage_dec(); 615 + 627 616 exit: 628 617 ist_exit(regs); 629 618 }
+1 -1
arch/x86/platform/efi/efi_64.c
··· 227 227 if (!pud) { 228 228 if (CONFIG_PGTABLE_LEVELS > 4) 229 229 free_page((unsigned long) pgd_page_vaddr(*pgd)); 230 - free_page((unsigned long)efi_pgd); 230 + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); 231 231 return -ENOMEM; 232 232 } 233 233
-4
arch/x86/um/asm/barrier.h
··· 30 30 31 31 #endif /* CONFIG_X86_32 */ 32 32 33 - #ifdef CONFIG_X86_PPRO_FENCE 34 - #define dma_rmb() rmb() 35 - #else /* CONFIG_X86_PPRO_FENCE */ 36 33 #define dma_rmb() barrier() 37 - #endif /* CONFIG_X86_PPRO_FENCE */ 38 34 #define dma_wmb() barrier() 39 35 40 36 #include <asm-generic/barrier.h>
+6 -2
tools/testing/selftests/x86/ptrace_syscall.c
··· 183 183 if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) 184 184 err(1, "PTRACE_TRACEME"); 185 185 186 + pid_t pid = getpid(), tid = syscall(SYS_gettid); 187 + 186 188 printf("\tChild will make one syscall\n"); 187 - raise(SIGSTOP); 189 + syscall(SYS_tgkill, pid, tid, SIGSTOP); 188 190 189 191 syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); 190 192 _exit(0); ··· 303 301 if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) 304 302 err(1, "PTRACE_TRACEME"); 305 303 304 + pid_t pid = getpid(), tid = syscall(SYS_gettid); 305 + 306 306 printf("\tChild will take a nap until signaled\n"); 307 307 setsigign(SIGUSR1, SA_RESTART); 308 - raise(SIGSTOP); 308 + syscall(SYS_tgkill, pid, tid, SIGSTOP); 309 309 310 310 syscall(SYS_pause, 0, 0, 0, 0, 0, 0); 311 311 _exit(0);