Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86/vdso' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into next

Pull x86 cdso updates from Peter Anvin:
"Vdso cleanups and improvements largely from Andy Lutomirski. This
makes the vdso a lot less ''special''"

* 'x86/vdso' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/vdso, build: Make LE access macros clearer, host-safe
x86/vdso, build: Fix cross-compilation from big-endian architectures
x86/vdso, build: When vdso2c fails, unlink the output
x86, vdso: Fix an OOPS accessing the HPET mapping w/o an HPET
x86, mm: Replace arch_vma_name with vm_ops->name for vsyscalls
x86, mm: Improve _install_special_mapping and fix x86 vdso naming
mm, fs: Add vm_ops->name as an alternative to arch_vma_name
x86, vdso: Fix an OOPS accessing the HPET mapping w/o an HPET
x86, vdso: Remove vestiges of VDSO_PRELINK and some outdated comments
x86, vdso: Move the vvar and hpet mappings next to the 64-bit vDSO
x86, vdso: Move the 32-bit vdso special pages after the text
x86, vdso: Reimplement vdso.so preparation in build-time C
x86, vdso: Move syscall and sysenter setup into kernel/cpu/common.c
x86, vdso: Clean up 32-bit vs 64-bit vdso params
x86, mm: Ensure correct alignment of the fixmap

+798 -612
+4 -4
arch/x86/ia32/ia32_signal.c
··· 383 383 } else { 384 384 /* Return stub is in 32bit vsyscall page */ 385 385 if (current->mm->context.vdso) 386 - restorer = VDSO32_SYMBOL(current->mm->context.vdso, 387 - sigreturn); 386 + restorer = current->mm->context.vdso + 387 + selected_vdso32->sym___kernel_sigreturn; 388 388 else 389 389 restorer = &frame->retcode; 390 390 } ··· 462 462 if (ksig->ka.sa.sa_flags & SA_RESTORER) 463 463 restorer = ksig->ka.sa.sa_restorer; 464 464 else 465 - restorer = VDSO32_SYMBOL(current->mm->context.vdso, 466 - rt_sigreturn); 465 + restorer = current->mm->context.vdso + 466 + selected_vdso32->sym___kernel_rt_sigreturn; 467 467 put_user_ex(ptr_to_compat(restorer), &frame->pretcode); 468 468 469 469 /*
+20 -15
arch/x86/include/asm/elf.h
··· 75 75 76 76 #include <asm/vdso.h> 77 77 78 - extern unsigned int vdso_enabled; 78 + #ifdef CONFIG_X86_64 79 + extern unsigned int vdso64_enabled; 80 + #endif 81 + #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) 82 + extern unsigned int vdso32_enabled; 83 + #endif 79 84 80 85 /* 81 86 * This is used to ensure we don't load something for the wrong architecture. ··· 274 269 275 270 struct task_struct; 276 271 277 - #define ARCH_DLINFO_IA32(vdso_enabled) \ 272 + #define ARCH_DLINFO_IA32 \ 278 273 do { \ 279 - if (vdso_enabled) { \ 274 + if (vdso32_enabled) { \ 280 275 NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \ 281 276 NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \ 282 277 } \ ··· 286 281 287 282 #define STACK_RND_MASK (0x7ff) 288 283 289 - #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) 284 + #define ARCH_DLINFO ARCH_DLINFO_IA32 290 285 291 286 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ 292 287 ··· 297 292 298 293 #define ARCH_DLINFO \ 299 294 do { \ 300 - if (vdso_enabled) \ 295 + if (vdso64_enabled) \ 301 296 NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 302 - (unsigned long)current->mm->context.vdso); \ 297 + (unsigned long __force)current->mm->context.vdso); \ 303 298 } while (0) 304 299 300 + /* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */ 305 301 #define ARCH_DLINFO_X32 \ 306 302 do { \ 307 - if (vdso_enabled) \ 303 + if (vdso64_enabled) \ 308 304 NEW_AUX_ENT(AT_SYSINFO_EHDR, \ 309 - (unsigned long)current->mm->context.vdso); \ 305 + (unsigned long __force)current->mm->context.vdso); \ 310 306 } while (0) 311 307 312 308 #define AT_SYSINFO 32 ··· 316 310 if (test_thread_flag(TIF_X32)) \ 317 311 ARCH_DLINFO_X32; \ 318 312 else \ 319 - ARCH_DLINFO_IA32(sysctl_vsyscall32) 313 + ARCH_DLINFO_IA32 320 314 321 315 #define COMPAT_ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x1000000) 322 316 ··· 325 319 #define VDSO_CURRENT_BASE ((unsigned long)current->mm->context.vdso) 326 320 327 321 #define VDSO_ENTRY \ 328 - ((unsigned long)VDSO32_SYMBOL(VDSO_CURRENT_BASE, vsyscall)) 322 + ((unsigned long)current->mm->context.vdso + \ 323 + selected_vdso32->sym___kernel_vsyscall) 329 324 330 325 struct linux_binprm; 331 326 332 327 #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 333 328 extern int arch_setup_additional_pages(struct linux_binprm *bprm, 334 329 int uses_interp); 335 - extern int x32_setup_additional_pages(struct linux_binprm *bprm, 336 - int uses_interp); 337 - 338 - extern int syscall32_setup_pages(struct linux_binprm *, int exstack); 339 - #define compat_arch_setup_additional_pages syscall32_setup_pages 330 + extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, 331 + int uses_interp); 332 + #define compat_arch_setup_additional_pages compat_arch_setup_additional_pages 340 333 341 334 extern unsigned long arch_randomize_brk(struct mm_struct *mm); 342 335 #define arch_randomize_brk arch_randomize_brk
+4 -7
arch/x86/include/asm/fixmap.h
··· 24 24 #include <linux/threads.h> 25 25 #include <asm/kmap_types.h> 26 26 #else 27 - #include <asm/vsyscall.h> 27 + #include <uapi/asm/vsyscall.h> 28 28 #endif 29 29 30 30 /* ··· 41 41 extern unsigned long __FIXADDR_TOP; 42 42 #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) 43 43 #else 44 - #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) 44 + #define FIXADDR_TOP (round_up(VSYSCALL_ADDR + PAGE_SIZE, 1<<PMD_SHIFT) - \ 45 + PAGE_SIZE) 45 46 #endif 46 47 47 48 ··· 69 68 #ifdef CONFIG_X86_32 70 69 FIX_HOLE, 71 70 #else 72 - VSYSCALL_LAST_PAGE, 73 - VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE 74 - + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1, 75 - VVAR_PAGE, 76 - VSYSCALL_HPET, 71 + VSYSCALL_PAGE = (FIXADDR_TOP - VSYSCALL_ADDR) >> PAGE_SHIFT, 77 72 #ifdef CONFIG_PARAVIRT_CLOCK 78 73 PVCLOCK_FIXMAP_BEGIN, 79 74 PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
+1 -1
arch/x86/include/asm/mmu.h
··· 18 18 #endif 19 19 20 20 struct mutex lock; 21 - void *vdso; 21 + void __user *vdso; 22 22 } mm_context_t; 23 23 24 24 #ifdef CONFIG_SMP
-2
arch/x86/include/asm/proto.h
··· 12 12 void ia32_cstar_target(void); 13 13 void ia32_sysenter_target(void); 14 14 15 - void syscall32_cpu_init(void); 16 - 17 15 void x86_configure_nx(void); 18 16 void x86_report_nx(void); 19 17
+34 -46
arch/x86/include/asm/vdso.h
··· 3 3 4 4 #include <asm/page_types.h> 5 5 #include <linux/linkage.h> 6 + #include <linux/init.h> 6 7 7 - #ifdef __ASSEMBLER__ 8 + #ifndef __ASSEMBLER__ 8 9 9 - #define DEFINE_VDSO_IMAGE(symname, filename) \ 10 - __PAGE_ALIGNED_DATA ; \ 11 - .globl symname##_start, symname##_end ; \ 12 - .align PAGE_SIZE ; \ 13 - symname##_start: ; \ 14 - .incbin filename ; \ 15 - symname##_end: ; \ 16 - .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \ 17 - \ 18 - .previous ; \ 19 - \ 20 - .globl symname##_pages ; \ 21 - .bss ; \ 22 - .align 8 ; \ 23 - .type symname##_pages, @object ; \ 24 - symname##_pages: ; \ 25 - .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \ 26 - .size symname##_pages, .-symname##_pages 10 + #include <linux/mm_types.h> 27 11 28 - #else 12 + struct vdso_image { 13 + void *data; 14 + unsigned long size; /* Always a multiple of PAGE_SIZE */ 29 15 30 - #define DECLARE_VDSO_IMAGE(symname) \ 31 - extern char symname##_start[], symname##_end[]; \ 32 - extern struct page *symname##_pages[] 16 + /* text_mapping.pages is big enough for data/size page pointers */ 17 + struct vm_special_mapping text_mapping; 18 + 19 + unsigned long alt, alt_len; 20 + 21 + unsigned long sym_end_mapping; /* Total size of the mapping */ 22 + 23 + unsigned long sym_vvar_page; 24 + unsigned long sym_hpet_page; 25 + unsigned long sym_VDSO32_NOTE_MASK; 26 + unsigned long sym___kernel_sigreturn; 27 + unsigned long sym___kernel_rt_sigreturn; 28 + unsigned long sym___kernel_vsyscall; 29 + unsigned long sym_VDSO32_SYSENTER_RETURN; 30 + }; 31 + 32 + #ifdef CONFIG_X86_64 33 + extern const struct vdso_image vdso_image_64; 34 + #endif 35 + 36 + #ifdef CONFIG_X86_X32 37 + extern const struct vdso_image vdso_image_x32; 38 + #endif 33 39 34 40 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT 35 - 36 - #include <asm/vdso32.h> 37 - 38 - DECLARE_VDSO_IMAGE(vdso32_int80); 41 + extern const struct vdso_image vdso_image_32_int80; 39 42 #ifdef CONFIG_COMPAT 40 - DECLARE_VDSO_IMAGE(vdso32_syscall); 43 + extern const struct vdso_image vdso_image_32_syscall; 41 44 #endif 42 - DECLARE_VDSO_IMAGE(vdso32_sysenter); 45 + extern const struct vdso_image vdso_image_32_sysenter; 43 46 44 - /* 45 - * Given a pointer to the vDSO image, find the pointer to VDSO32_name 46 - * as that symbol is defined in the vDSO sources or linker script. 47 - */ 48 - #define VDSO32_SYMBOL(base, name) \ 49 - ({ \ 50 - extern const char VDSO32_##name[]; \ 51 - (void __user *)(VDSO32_##name + (unsigned long)(base)); \ 52 - }) 47 + extern const struct vdso_image *selected_vdso32; 53 48 #endif 54 49 55 - /* 56 - * These symbols are defined with the addresses in the vsyscall page. 57 - * See vsyscall-sigreturn.S. 58 - */ 59 - extern void __user __kernel_sigreturn; 60 - extern void __user __kernel_rt_sigreturn; 61 - 62 - void __init patch_vdso32(void *vdso, size_t len); 50 + extern void __init init_vdso_image(const struct vdso_image *image); 63 51 64 52 #endif /* __ASSEMBLER__ */ 65 53
-11
arch/x86/include/asm/vdso32.h
··· 1 - #ifndef _ASM_X86_VDSO32_H 2 - #define _ASM_X86_VDSO32_H 3 - 4 - #define VDSO_BASE_PAGE 0 5 - #define VDSO_VVAR_PAGE 1 6 - #define VDSO_HPET_PAGE 2 7 - #define VDSO_PAGES 3 8 - #define VDSO_PREV_PAGES 2 9 - #define VDSO_OFFSET(x) ((x) * PAGE_SIZE) 10 - 11 - #endif
+1 -19
arch/x86/include/asm/vvar.h
··· 29 29 30 30 #else 31 31 32 - #ifdef BUILD_VDSO32 32 + extern char __vvar_page; 33 33 34 34 #define DECLARE_VVAR(offset, type, name) \ 35 35 extern type vvar_ ## name __attribute__((visibility("hidden"))); 36 36 37 37 #define VVAR(name) (vvar_ ## name) 38 - 39 - #else 40 - 41 - extern char __vvar_page; 42 - 43 - /* Base address of vvars. This is not ABI. */ 44 - #ifdef CONFIG_X86_64 45 - #define VVAR_ADDRESS (-10*1024*1024 - 4096) 46 - #else 47 - #define VVAR_ADDRESS (&__vvar_page) 48 - #endif 49 - 50 - #define DECLARE_VVAR(offset, type, name) \ 51 - static type const * const vvaraddr_ ## name = \ 52 - (void *)(VVAR_ADDRESS + (offset)); 53 - 54 - #define VVAR(name) (*vvaraddr_ ## name) 55 - #endif 56 38 57 39 #define DEFINE_VVAR(type, name) \ 58 40 type name \
+1 -6
arch/x86/include/uapi/asm/vsyscall.h
··· 7 7 __NR_vgetcpu, 8 8 }; 9 9 10 - #define VSYSCALL_START (-10UL << 20) 11 - #define VSYSCALL_SIZE 1024 12 - #define VSYSCALL_END (-2UL << 20) 13 - #define VSYSCALL_MAPPED_PAGES 1 14 - #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) 15 - 10 + #define VSYSCALL_ADDR (-10UL << 20) 16 11 17 12 #endif /* _UAPI_ASM_X86_VSYSCALL_H */
+33
arch/x86/kernel/cpu/common.c
··· 20 20 #include <asm/processor.h> 21 21 #include <asm/debugreg.h> 22 22 #include <asm/sections.h> 23 + #include <asm/vsyscall.h> 23 24 #include <linux/topology.h> 24 25 #include <linux/cpumask.h> 25 26 #include <asm/pgtable.h> ··· 953 952 vgetcpu_mode = VGETCPU_RDTSCP; 954 953 else 955 954 vgetcpu_mode = VGETCPU_LSL; 955 + } 956 + 957 + /* May not be __init: called during resume */ 958 + static void syscall32_cpu_init(void) 959 + { 960 + /* Load these always in case some future AMD CPU supports 961 + SYSENTER from compat mode too. */ 962 + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 963 + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 964 + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); 965 + 966 + wrmsrl(MSR_CSTAR, ia32_cstar_target); 967 + } 968 + #endif 969 + 970 + #ifdef CONFIG_X86_32 971 + void enable_sep_cpu(void) 972 + { 973 + int cpu = get_cpu(); 974 + struct tss_struct *tss = &per_cpu(init_tss, cpu); 975 + 976 + if (!boot_cpu_has(X86_FEATURE_SEP)) { 977 + put_cpu(); 978 + return; 979 + } 980 + 981 + tss->x86_tss.ss1 = __KERNEL_CS; 982 + tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; 983 + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 984 + wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); 985 + wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); 986 + put_cpu(); 956 987 } 957 988 #endif 958 989
-3
arch/x86/kernel/hpet.c
··· 74 74 static inline void hpet_set_mapping(void) 75 75 { 76 76 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 77 - #ifdef CONFIG_X86_64 78 - __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VVAR_NOCACHE); 79 - #endif 80 77 } 81 78 82 79 static inline void hpet_clear_mapping(void)
+4 -2
arch/x86/kernel/signal.c
··· 298 298 } 299 299 300 300 if (current->mm->context.vdso) 301 - restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); 301 + restorer = current->mm->context.vdso + 302 + selected_vdso32->sym___kernel_sigreturn; 302 303 else 303 304 restorer = &frame->retcode; 304 305 if (ksig->ka.sa.sa_flags & SA_RESTORER) ··· 362 361 save_altstack_ex(&frame->uc.uc_stack, regs->sp); 363 362 364 363 /* Set up to return from userspace. */ 365 - restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 364 + restorer = current->mm->context.vdso + 365 + selected_vdso32->sym___kernel_sigreturn; 366 366 if (ksig->ka.sa.sa_flags & SA_RESTORER) 367 367 restorer = ksig->ka.sa.sa_restorer; 368 368 put_user_ex(restorer, &frame->pretcode);
+4 -11
arch/x86/kernel/vsyscall_64.c
··· 91 91 { 92 92 int nr; 93 93 94 - if ((addr & ~0xC00UL) != VSYSCALL_START) 94 + if ((addr & ~0xC00UL) != VSYSCALL_ADDR) 95 95 return -EINVAL; 96 96 97 97 nr = (addr & 0xC00UL) >> 10; ··· 330 330 { 331 331 extern char __vsyscall_page; 332 332 unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); 333 - unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); 334 333 335 - __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, 334 + __set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall, 336 335 vsyscall_mode == NATIVE 337 336 ? PAGE_KERNEL_VSYSCALL 338 337 : PAGE_KERNEL_VVAR); 339 - BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) != 340 - (unsigned long)VSYSCALL_START); 341 - 342 - __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR); 343 - BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != 344 - (unsigned long)VVAR_ADDRESS); 338 + BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) != 339 + (unsigned long)VSYSCALL_ADDR); 345 340 } 346 341 347 342 static int __init vsyscall_init(void) 348 343 { 349 - BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); 350 - 351 344 cpu_notifier_register_begin(); 352 345 353 346 on_each_cpu(cpu_vsyscall_init, NULL, 1);
+3 -2
arch/x86/mm/fault.c
··· 18 18 #include <asm/traps.h> /* dotraplinkage, ... */ 19 19 #include <asm/pgalloc.h> /* pgd_*(), ... */ 20 20 #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ 21 - #include <asm/fixmap.h> /* VSYSCALL_START */ 21 + #include <asm/fixmap.h> /* VSYSCALL_ADDR */ 22 + #include <asm/vsyscall.h> /* emulate_vsyscall */ 22 23 23 24 #define CREATE_TRACE_POINTS 24 25 #include <asm/trace/exceptions.h> ··· 772 771 * emulation. 773 772 */ 774 773 if (unlikely((error_code & PF_INSTR) && 775 - ((address & ~0xfff) == VSYSCALL_START))) { 774 + ((address & ~0xfff) == VSYSCALL_ADDR))) { 776 775 if (emulate_vsyscall(regs, address)) 777 776 return; 778 777 }
+14 -15
arch/x86/mm/init_64.c
··· 1055 1055 after_bootmem = 1; 1056 1056 1057 1057 /* Register memory areas for /proc/kcore */ 1058 - kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START, 1059 - VSYSCALL_END - VSYSCALL_START, KCORE_OTHER); 1058 + kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, 1059 + PAGE_SIZE, KCORE_OTHER); 1060 1060 1061 1061 mem_init_print_info(NULL); 1062 1062 } ··· 1185 1185 * covers the 64bit vsyscall page now. 32bit has a real VMA now and does 1186 1186 * not need special handling anymore: 1187 1187 */ 1188 + static const char *gate_vma_name(struct vm_area_struct *vma) 1189 + { 1190 + return "[vsyscall]"; 1191 + } 1192 + static struct vm_operations_struct gate_vma_ops = { 1193 + .name = gate_vma_name, 1194 + }; 1188 1195 static struct vm_area_struct gate_vma = { 1189 - .vm_start = VSYSCALL_START, 1190 - .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES * PAGE_SIZE), 1196 + .vm_start = VSYSCALL_ADDR, 1197 + .vm_end = VSYSCALL_ADDR + PAGE_SIZE, 1191 1198 .vm_page_prot = PAGE_READONLY_EXEC, 1192 - .vm_flags = VM_READ | VM_EXEC 1199 + .vm_flags = VM_READ | VM_EXEC, 1200 + .vm_ops = &gate_vma_ops, 1193 1201 }; 1194 1202 1195 1203 struct vm_area_struct *get_gate_vma(struct mm_struct *mm) ··· 1226 1218 */ 1227 1219 int in_gate_area_no_mm(unsigned long addr) 1228 1220 { 1229 - return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); 1230 - } 1231 - 1232 - const char *arch_vma_name(struct vm_area_struct *vma) 1233 - { 1234 - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) 1235 - return "[vdso]"; 1236 - if (vma == &gate_vma) 1237 - return "[vsyscall]"; 1238 - return NULL; 1221 + return (addr & PAGE_MASK) == VSYSCALL_ADDR; 1239 1222 } 1240 1223 1241 1224 static unsigned long probe_memory_block_size(void)
+6
arch/x86/mm/ioremap.c
··· 367 367 { 368 368 pmd_t *pmd; 369 369 370 + #ifdef CONFIG_X86_64 371 + BUILD_BUG_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); 372 + #else 373 + WARN_ON((fix_to_virt(0) + PAGE_SIZE) & ((1 << PMD_SHIFT) - 1)); 374 + #endif 375 + 370 376 early_ioremap_setup(); 371 377 372 378 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+3 -3
arch/x86/mm/pgtable.c
··· 456 456 { 457 457 #ifdef CONFIG_X86_32 458 458 BUG_ON(fixmaps_set > 0); 459 - printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", 460 - (int)-reserve); 461 - __FIXADDR_TOP = -reserve - PAGE_SIZE; 459 + __FIXADDR_TOP = round_down(-reserve, 1 << PMD_SHIFT) - PAGE_SIZE; 460 + printk(KERN_INFO "Reserving virtual address space above 0x%08lx (rounded to 0x%08lx)\n", 461 + -reserve, __FIXADDR_TOP + PAGE_SIZE); 462 462 #endif 463 463 } 464 464
+1 -1
arch/x86/um/vdso/vma.c
··· 12 12 #include <asm/page.h> 13 13 #include <linux/init.h> 14 14 15 - unsigned int __read_mostly vdso_enabled = 1; 15 + static unsigned int __read_mostly vdso_enabled = 1; 16 16 unsigned long um_vdso_addr; 17 17 18 18 extern unsigned long task_size;
+2 -3
arch/x86/vdso/.gitignore
··· 1 1 vdso.lds 2 - vdso-syms.lds 3 2 vdsox32.lds 4 - vdsox32-syms.lds 5 - vdso32-syms.lds 6 3 vdso32-syscall-syms.lds 7 4 vdso32-sysenter-syms.lds 8 5 vdso32-int80-syms.lds 6 + vdso-image-*.c 7 + vdso2c
+32 -58
arch/x86/vdso/Makefile
··· 24 24 25 25 # files to link into kernel 26 26 obj-y += vma.o 27 - obj-$(VDSO64-y) += vdso.o 28 - obj-$(VDSOX32-y) += vdsox32.o 29 - obj-$(VDSO32-y) += vdso32.o vdso32-setup.o 27 + 28 + # vDSO images to build 29 + vdso_img-$(VDSO64-y) += 64 30 + vdso_img-$(VDSOX32-y) += x32 31 + vdso_img-$(VDSO32-y) += 32-int80 32 + vdso_img-$(CONFIG_COMPAT) += 32-syscall 33 + vdso_img-$(VDSO32-y) += 32-sysenter 34 + 35 + obj-$(VDSO32-y) += vdso32-setup.o 30 36 31 37 vobjs := $(foreach F,$(vobj64s),$(obj)/$F) 32 38 33 39 $(obj)/vdso.o: $(obj)/vdso.so 34 40 35 - targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) 41 + targets += vdso.lds $(vobjs-y) 42 + 43 + # Build the vDSO image C files and link them in. 44 + vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) 45 + vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) 46 + vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) 47 + obj-y += $(vdso_img_objs) 48 + targets += $(vdso_img_cfiles) 49 + targets += $(vdso_img_sodbg) 50 + .SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) 36 51 37 52 export CPPFLAGS_vdso.lds += -P -C 38 53 ··· 56 41 -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ 57 42 $(DISABLE_LTO) 58 43 59 - $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so 60 - 61 - $(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE 44 + $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE 62 45 $(call if_changed,vdso) 63 46 64 - $(obj)/%.so: OBJCOPYFLAGS := -S 65 - $(obj)/%.so: $(obj)/%.so.dbg FORCE 66 - $(call if_changed,objcopy) 47 + hostprogs-y += vdso2c 48 + 49 + quiet_cmd_vdso2c = VDSO2C $@ 50 + define cmd_vdso2c 51 + $(obj)/vdso2c $< $@ 52 + endef 53 + 54 + $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE 55 + $(call if_changed,vdso2c) 67 56 68 57 # 69 58 # Don't omit frame pointers for ease of userspace debugging, but do ··· 87 68 CFLAGS_REMOVE_vgetcpu.o = -pg 88 69 CFLAGS_REMOVE_vvar.o = -pg 89 70 90 - targets += vdso-syms.lds 91 - obj-$(VDSO64-y) += vdso-syms.lds 92 - 93 - # 94 - # Match symbols in the DSO that look like VDSO*; produce a file of constants. 95 - # 96 - sed-vdsosym := -e 's/^00*/0/' \ 97 - -e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' 98 - quiet_cmd_vdsosym = VDSOSYM $@ 99 - define cmd_vdsosym 100 - $(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ 101 - endef 102 - 103 - $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE 104 - $(call if_changed,vdsosym) 105 - 106 71 # 107 72 # X32 processes use x32 vDSO to access 64bit kernel data. 108 73 # ··· 96 93 # 3. Build x32 VDSO image with x32 objects, which contains 64bit codes 97 94 # so that it can reach 64bit address space with 64bit pointers. 98 95 # 99 - 100 - targets += vdsox32-syms.lds 101 - obj-$(VDSOX32-y) += vdsox32-syms.lds 102 96 103 97 CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) 104 98 VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ ··· 113 113 $(obj)/%-x32.o: $(obj)/%.o FORCE 114 114 $(call if_changed,x32) 115 115 116 - targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y) 117 - 118 - $(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so 116 + targets += vdsox32.lds $(vobjx32s-y) 119 117 120 118 $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE 121 119 $(call if_changed,vdso) ··· 121 123 # 122 124 # Build multiple 32-bit vDSO images to choose from at boot time. 123 125 # 124 - obj-$(VDSO32-y) += vdso32-syms.lds 125 126 vdso32.so-$(VDSO32-y) += int80 126 127 vdso32.so-$(CONFIG_COMPAT) += syscall 127 128 vdso32.so-$(VDSO32-y) += sysenter ··· 135 138 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ 136 139 137 140 targets += vdso32/vdso32.lds 138 - targets += $(vdso32-images) $(vdso32-images:=.dbg) 139 141 targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) 140 - 141 - extra-y += $(vdso32-images) 142 + targets += vdso32/vclock_gettime.o 142 143 143 144 $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) 144 145 ··· 161 166 $(obj)/vdso32/%.o 162 167 $(call if_changed,vdso) 163 168 164 - # Make vdso32-*-syms.lds from each image, and then make sure they match. 165 - # The only difference should be that some do not define VDSO32_SYSENTER_RETURN. 166 - 167 - targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds) 168 - 169 - quiet_cmd_vdso32sym = VDSOSYM $@ 170 - define cmd_vdso32sym 171 - if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \ 172 - $(foreach H,$(filter-out FORCE,$^),\ 173 - if grep -q VDSO32_SYSENTER_RETURN $H; \ 174 - then diff -u $(@D)/.tmp_$(@F) $H; \ 175 - else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \ 176 - diff -u - $H; fi &&) : ;\ 177 - then mv -f $(@D)/.tmp_$(@F) $@; \ 178 - else rm -f $(@D)/.tmp_$(@F); exit 1; \ 179 - fi 180 - endef 181 - 182 - $(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE 183 - $(call if_changed,vdso32sym) 184 - 185 169 # 186 170 # The DSO images are built using a special linker script. 187 171 # ··· 171 197 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' 172 198 173 199 VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ 174 - $(LTO_CFLAGS) 200 + -Wl,-Bsymbolic $(LTO_CFLAGS) 175 201 GCOV_PROFILE := n 176 202 177 203 #
+7 -19
arch/x86/vdso/vclock_gettime.c
··· 30 30 extern time_t __vdso_time(time_t *t); 31 31 32 32 #ifdef CONFIG_HPET_TIMER 33 - static inline u32 read_hpet_counter(const volatile void *addr) 33 + extern u8 hpet_page 34 + __attribute__((visibility("hidden"))); 35 + 36 + static notrace cycle_t vread_hpet(void) 34 37 { 35 - return *(const volatile u32 *) (addr + HPET_COUNTER); 38 + return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); 36 39 } 37 40 #endif 38 41 ··· 45 42 #include <asm/vsyscall.h> 46 43 #include <asm/fixmap.h> 47 44 #include <asm/pvclock.h> 48 - 49 - static notrace cycle_t vread_hpet(void) 50 - { 51 - return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); 52 - } 53 45 54 46 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 55 47 { ··· 135 137 136 138 #else 137 139 138 - extern u8 hpet_page 139 - __attribute__((visibility("hidden"))); 140 - 141 - #ifdef CONFIG_HPET_TIMER 142 - static notrace cycle_t vread_hpet(void) 143 - { 144 - return read_hpet_counter((const void *)(&hpet_page)); 145 - } 146 - #endif 147 - 148 140 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 149 141 { 150 142 long ret; ··· 142 154 asm( 143 155 "mov %%ebx, %%edx \n" 144 156 "mov %2, %%ebx \n" 145 - "call VDSO32_vsyscall \n" 157 + "call __kernel_vsyscall \n" 146 158 "mov %%edx, %%ebx \n" 147 159 : "=a" (ret) 148 160 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) ··· 157 169 asm( 158 170 "mov %%ebx, %%edx \n" 159 171 "mov %2, %%ebx \n" 160 - "call VDSO32_vsyscall \n" 172 + "call __kernel_vsyscall \n" 161 173 "mov %%edx, %%ebx \n" 162 174 : "=a" (ret) 163 175 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
+24 -16
arch/x86/vdso/vdso-layout.lds.S
··· 1 + #include <asm/vdso.h> 2 + 1 3 /* 2 4 * Linker script for vDSO. This is an ELF shared object prelinked to 3 5 * its virtual address, and with only one read-only segment. ··· 8 6 9 7 SECTIONS 10 8 { 11 - #ifdef BUILD_VDSO32 12 - #include <asm/vdso32.h> 13 - 14 - hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE); 15 - 16 - vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE); 17 - 18 - /* Place all vvars at the offsets in asm/vvar.h. */ 19 - #define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset; 20 - #define __VVAR_KERNEL_LDS 21 - #include <asm/vvar.h> 22 - #undef __VVAR_KERNEL_LDS 23 - #undef EMIT_VVAR 24 - #endif 25 9 . = SIZEOF_HEADERS; 26 10 27 11 .hash : { *(.hash) } :text ··· 48 60 .text : { *(.text*) } :text =0x90909090, 49 61 50 62 /* 51 - * The comma above works around a bug in gold: 52 - * https://sourceware.org/bugzilla/show_bug.cgi?id=16804 63 + * The remainder of the vDSO consists of special pages that are 64 + * shared between the kernel and userspace. It needs to be at the 65 + * end so that it doesn't overlap the mapping of the actual 66 + * vDSO image. 53 67 */ 68 + 69 + . = ALIGN(PAGE_SIZE); 70 + vvar_page = .; 71 + 72 + /* Place all vvars at the offsets in asm/vvar.h. */ 73 + #define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; 74 + #define __VVAR_KERNEL_LDS 75 + #include <asm/vvar.h> 76 + #undef __VVAR_KERNEL_LDS 77 + #undef EMIT_VVAR 78 + 79 + . = vvar_page + PAGE_SIZE; 80 + 81 + hpet_page = .; 82 + . = . + PAGE_SIZE; 83 + 84 + . = ALIGN(PAGE_SIZE); 85 + end_mapping = .; 54 86 55 87 /DISCARD/ : { 56 88 *(.discard)
-3
arch/x86/vdso/vdso.S
··· 1 - #include <asm/vdso.h> 2 - 3 - DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
+1 -6
arch/x86/vdso/vdso.lds.S
··· 1 1 /* 2 2 * Linker script for 64-bit vDSO. 3 3 * We #include the file to define the layout details. 4 - * Here we only choose the prelinked virtual address. 5 4 * 6 5 * This file defines the version script giving the user-exported symbols in 7 - * the DSO. We can define local symbols here called VDSO* to make their 8 - * values visible using the asm-x86/vdso.h macros from the kernel proper. 6 + * the DSO. 9 7 */ 10 8 11 - #define VDSO_PRELINK 0xffffffffff700000 12 9 #include "vdso-layout.lds.S" 13 10 14 11 /* ··· 25 28 local: *; 26 29 }; 27 30 } 28 - 29 - VDSO64_PRELINK = VDSO_PRELINK;
+173
arch/x86/vdso/vdso2c.c
··· 1 + #include <inttypes.h> 2 + #include <stdint.h> 3 + #include <unistd.h> 4 + #include <stdarg.h> 5 + #include <stdlib.h> 6 + #include <stdio.h> 7 + #include <string.h> 8 + #include <fcntl.h> 9 + #include <err.h> 10 + 11 + #include <sys/mman.h> 12 + #include <sys/types.h> 13 + 14 + #include <linux/elf.h> 15 + #include <linux/types.h> 16 + 17 + const char *outfilename; 18 + 19 + /* Symbols that we need in vdso2c. */ 20 + enum { 21 + sym_vvar_page, 22 + sym_hpet_page, 23 + sym_end_mapping, 24 + }; 25 + 26 + const int special_pages[] = { 27 + sym_vvar_page, 28 + sym_hpet_page, 29 + }; 30 + 31 + char const * const required_syms[] = { 32 + [sym_vvar_page] = "vvar_page", 33 + [sym_hpet_page] = "hpet_page", 34 + [sym_end_mapping] = "end_mapping", 35 + "VDSO32_NOTE_MASK", 36 + "VDSO32_SYSENTER_RETURN", 37 + "__kernel_vsyscall", 38 + "__kernel_sigreturn", 39 + "__kernel_rt_sigreturn", 40 + }; 41 + 42 + __attribute__((format(printf, 1, 2))) __attribute__((noreturn)) 43 + static void fail(const char *format, ...) 44 + { 45 + va_list ap; 46 + va_start(ap, format); 47 + fprintf(stderr, "Error: "); 48 + vfprintf(stderr, format, ap); 49 + unlink(outfilename); 50 + exit(1); 51 + va_end(ap); 52 + } 53 + 54 + /* 55 + * Evil macros to do a little-endian read. 56 + */ 57 + #define GLE(x, bits, ifnot) \ 58 + __builtin_choose_expr( \ 59 + (sizeof(x) == bits/8), \ 60 + (__typeof__(x))le##bits##toh(x), ifnot) 61 + 62 + extern void bad_get_le(uint64_t); 63 + #define LAST_LE(x) \ 64 + __builtin_choose_expr(sizeof(x) == 1, (x), bad_get_le(x)) 65 + 66 + #define GET_LE(x) \ 67 + GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_LE(x)))) 68 + 69 + #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) 70 + 71 + #define BITS 64 72 + #define GOFUNC go64 73 + #define Elf_Ehdr Elf64_Ehdr 74 + #define Elf_Shdr Elf64_Shdr 75 + #define Elf_Phdr Elf64_Phdr 76 + #define Elf_Sym Elf64_Sym 77 + #define Elf_Dyn Elf64_Dyn 78 + #include "vdso2c.h" 79 + #undef BITS 80 + #undef GOFUNC 81 + #undef Elf_Ehdr 82 + #undef Elf_Shdr 83 + #undef Elf_Phdr 84 + #undef Elf_Sym 85 + #undef Elf_Dyn 86 + 87 + #define BITS 32 88 + #define GOFUNC go32 89 + #define Elf_Ehdr Elf32_Ehdr 90 + #define Elf_Shdr Elf32_Shdr 91 + #define Elf_Phdr Elf32_Phdr 92 + #define Elf_Sym Elf32_Sym 93 + #define Elf_Dyn Elf32_Dyn 94 + #include "vdso2c.h" 95 + #undef BITS 96 + #undef GOFUNC 97 + #undef Elf_Ehdr 98 + #undef Elf_Shdr 99 + #undef Elf_Phdr 100 + #undef Elf_Sym 101 + #undef Elf_Dyn 102 + 103 + static void go(void *addr, size_t len, FILE *outfile, const char *name) 104 + { 105 + Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr; 106 + 107 + if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { 108 + go64(addr, len, outfile, name); 109 + } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { 110 + go32(addr, len, outfile, name); 111 + } else { 112 + fail("unknown ELF class\n"); 113 + } 114 + } 115 + 116 + int main(int argc, char **argv) 117 + { 118 + int fd; 119 + off_t len; 120 + void *addr; 121 + FILE *outfile; 122 + char *name, *tmp; 123 + int namelen; 124 + 125 + if (argc != 3) { 126 + printf("Usage: vdso2c INPUT OUTPUT\n"); 127 + return 1; 128 + } 129 + 130 + /* 131 + * Figure out the struct name. If we're writing to a .so file, 132 + * generate raw output insted. 133 + */ 134 + name = strdup(argv[2]); 135 + namelen = strlen(name); 136 + if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { 137 + name = NULL; 138 + } else { 139 + tmp = strrchr(name, '/'); 140 + if (tmp) 141 + name = tmp + 1; 142 + tmp = strchr(name, '.'); 143 + if (tmp) 144 + *tmp = '\0'; 145 + for (tmp = name; *tmp; tmp++) 146 + if (*tmp == '-') 147 + *tmp = '_'; 148 + } 149 + 150 + fd = open(argv[1], O_RDONLY); 151 + if (fd == -1) 152 + err(1, "%s", argv[1]); 153 + 154 + len = lseek(fd, 0, SEEK_END); 155 + if (len == (off_t)-1) 156 + err(1, "lseek"); 157 + 158 + addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); 159 + if (addr == MAP_FAILED) 160 + err(1, "mmap"); 161 + 162 + outfilename = argv[2]; 163 + outfile = fopen(outfilename, "w"); 164 + if (!outfile) 165 + err(1, "%s", argv[2]); 166 + 167 + go(addr, (size_t)len, outfile, name); 168 + 169 + munmap(addr, len); 170 + fclose(outfile); 171 + 172 + return 0; 173 + }
+163
arch/x86/vdso/vdso2c.h
··· 1 + /* 2 + * This file is included twice from vdso2c.c. It generates code for 32-bit 3 + * and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs 4 + * are built for 32-bit userspace. 5 + */ 6 + 7 + static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) 8 + { 9 + int found_load = 0; 10 + unsigned long load_size = -1; /* Work around bogus warning */ 11 + unsigned long data_size; 12 + Elf_Ehdr *hdr = (Elf_Ehdr *)addr; 13 + int i; 14 + unsigned long j; 15 + Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, 16 + *alt_sec = NULL; 17 + Elf_Dyn *dyn = 0, *dyn_end = 0; 18 + const char *secstrings; 19 + uint64_t syms[NSYMS] = {}; 20 + 21 + Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(hdr->e_phoff)); 22 + 23 + /* Walk the segment table. */ 24 + for (i = 0; i < GET_LE(hdr->e_phnum); i++) { 25 + if (GET_LE(pt[i].p_type) == PT_LOAD) { 26 + if (found_load) 27 + fail("multiple PT_LOAD segs\n"); 28 + 29 + if (GET_LE(pt[i].p_offset) != 0 || 30 + GET_LE(pt[i].p_vaddr) != 0) 31 + fail("PT_LOAD in wrong place\n"); 32 + 33 + if (GET_LE(pt[i].p_memsz) != GET_LE(pt[i].p_filesz)) 34 + fail("cannot handle memsz != filesz\n"); 35 + 36 + load_size = GET_LE(pt[i].p_memsz); 37 + found_load = 1; 38 + } else if (GET_LE(pt[i].p_type) == PT_DYNAMIC) { 39 + dyn = addr + GET_LE(pt[i].p_offset); 40 + dyn_end = addr + GET_LE(pt[i].p_offset) + 41 + GET_LE(pt[i].p_memsz); 42 + } 43 + } 44 + if (!found_load) 45 + fail("no PT_LOAD seg\n"); 46 + data_size = (load_size + 4095) / 4096 * 4096; 47 + 48 + /* Walk the dynamic table */ 49 + for (i = 0; dyn + i < dyn_end && 50 + GET_LE(dyn[i].d_tag) != DT_NULL; i++) { 51 + typeof(dyn[i].d_tag) tag = GET_LE(dyn[i].d_tag); 52 + if (tag == DT_REL || tag == DT_RELSZ || 53 + tag == DT_RELENT || tag == DT_TEXTREL) 54 + fail("vdso image contains dynamic relocations\n"); 55 + } 56 + 57 + /* Walk the section table */ 58 + secstrings_hdr = addr + GET_LE(hdr->e_shoff) + 59 + GET_LE(hdr->e_shentsize)*GET_LE(hdr->e_shstrndx); 60 + secstrings = addr + GET_LE(secstrings_hdr->sh_offset); 61 + for (i = 0; i < GET_LE(hdr->e_shnum); i++) { 62 + Elf_Shdr *sh = addr + GET_LE(hdr->e_shoff) + 63 + GET_LE(hdr->e_shentsize) * i; 64 + if (GET_LE(sh->sh_type) == SHT_SYMTAB) 65 + symtab_hdr = sh; 66 + 67 + if (!strcmp(secstrings + GET_LE(sh->sh_name), 68 + ".altinstructions")) 69 + alt_sec = sh; 70 + } 71 + 72 + if (!symtab_hdr) 73 + fail("no symbol table\n"); 74 + 75 + strtab_hdr = addr + GET_LE(hdr->e_shoff) + 76 + GET_LE(hdr->e_shentsize) * GET_LE(symtab_hdr->sh_link); 77 + 78 + /* Walk the symbol table */ 79 + for (i = 0; 80 + i < GET_LE(symtab_hdr->sh_size) / GET_LE(symtab_hdr->sh_entsize); 81 + i++) { 82 + int k; 83 + Elf_Sym *sym = addr + GET_LE(symtab_hdr->sh_offset) + 84 + GET_LE(symtab_hdr->sh_entsize) * i; 85 + const char *name = addr + GET_LE(strtab_hdr->sh_offset) + 86 + GET_LE(sym->st_name); 87 + for (k = 0; k < NSYMS; k++) { 88 + if (!strcmp(name, required_syms[k])) { 89 + if (syms[k]) { 90 + fail("duplicate symbol %s\n", 91 + required_syms[k]); 92 + } 93 + syms[k] = GET_LE(sym->st_value); 94 + } 95 + } 96 + } 97 + 98 + /* Validate mapping addresses. */ 99 + for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { 100 + if (!syms[i]) 101 + continue; /* The mapping isn't used; ignore it. */ 102 + 103 + if (syms[i] % 4096) 104 + fail("%s must be a multiple of 4096\n", 105 + required_syms[i]); 106 + if (syms[i] < data_size) 107 + fail("%s must be after the text mapping\n", 108 + required_syms[i]); 109 + if (syms[sym_end_mapping] < syms[i] + 4096) 110 + fail("%s overruns end_mapping\n", required_syms[i]); 111 + } 112 + if (syms[sym_end_mapping] % 4096) 113 + fail("end_mapping must be a multiple of 4096\n"); 114 + 115 + /* Remove sections. */ 116 + hdr->e_shoff = 0; 117 + hdr->e_shentsize = 0; 118 + hdr->e_shnum = 0; 119 + hdr->e_shstrndx = htole16(SHN_UNDEF); 120 + 121 + if (!name) { 122 + fwrite(addr, load_size, 1, outfile); 123 + return; 124 + } 125 + 126 + fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); 127 + fprintf(outfile, "#include <linux/linkage.h>\n"); 128 + fprintf(outfile, "#include <asm/page_types.h>\n"); 129 + fprintf(outfile, "#include <asm/vdso.h>\n"); 130 + fprintf(outfile, "\n"); 131 + fprintf(outfile, 132 + "static unsigned char raw_data[%lu] __page_aligned_data = {", 133 + data_size); 134 + for (j = 0; j < load_size; j++) { 135 + if (j % 10 == 0) 136 + fprintf(outfile, "\n\t"); 137 + fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]); 138 + } 139 + fprintf(outfile, "\n};\n\n"); 140 + 141 + fprintf(outfile, "static struct page *pages[%lu];\n\n", 142 + data_size / 4096); 143 + 144 + fprintf(outfile, "const struct vdso_image %s = {\n", name); 145 + fprintf(outfile, "\t.data = raw_data,\n"); 146 + fprintf(outfile, "\t.size = %lu,\n", data_size); 147 + fprintf(outfile, "\t.text_mapping = {\n"); 148 + fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); 149 + fprintf(outfile, "\t\t.pages = pages,\n"); 150 + fprintf(outfile, "\t},\n"); 151 + if (alt_sec) { 152 + fprintf(outfile, "\t.alt = %lu,\n", 153 + (unsigned long)GET_LE(alt_sec->sh_offset)); 154 + fprintf(outfile, "\t.alt_len = %lu,\n", 155 + (unsigned long)GET_LE(alt_sec->sh_size)); 156 + } 157 + for (i = 0; i < NSYMS; i++) { 158 + if (syms[i]) 159 + fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n", 160 + required_syms[i], syms[i]); 161 + } 162 + fprintf(outfile, "};\n"); 163 + }
+22 -181
arch/x86/vdso/vdso32-setup.c
··· 8 8 9 9 #include <linux/init.h> 10 10 #include <linux/smp.h> 11 - #include <linux/thread_info.h> 12 - #include <linux/sched.h> 13 - #include <linux/gfp.h> 14 - #include <linux/string.h> 15 - #include <linux/elf.h> 16 - #include <linux/mm.h> 17 - #include <linux/err.h> 18 - #include <linux/module.h> 19 - #include <linux/slab.h> 11 + #include <linux/kernel.h> 12 + #include <linux/mm_types.h> 20 13 21 14 #include <asm/cpufeature.h> 22 - #include <asm/msr.h> 23 - #include <asm/pgtable.h> 24 - #include <asm/unistd.h> 25 - #include <asm/elf.h> 26 - #include <asm/tlbflush.h> 15 + #include <asm/processor.h> 27 16 #include <asm/vdso.h> 28 - #include <asm/proto.h> 29 - #include <asm/fixmap.h> 30 - #include <asm/hpet.h> 31 - #include <asm/vvar.h> 32 17 33 18 #ifdef CONFIG_COMPAT_VDSO 34 19 #define VDSO_DEFAULT 0 ··· 21 36 #define VDSO_DEFAULT 1 22 37 #endif 23 38 24 - #ifdef CONFIG_X86_64 25 - #define vdso_enabled sysctl_vsyscall32 26 - #define arch_setup_additional_pages syscall32_setup_pages 27 - #endif 28 - 29 39 /* 30 40 * Should the kernel map a VDSO page into processes and pass its 31 41 * address down to glibc upon exec()? 32 42 */ 33 - unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; 43 + unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT; 34 44 35 - static int __init vdso_setup(char *s) 45 + static int __init vdso32_setup(char *s) 36 46 { 37 - vdso_enabled = simple_strtoul(s, NULL, 0); 47 + vdso32_enabled = simple_strtoul(s, NULL, 0); 38 48 39 - if (vdso_enabled > 1) 49 + if (vdso32_enabled > 1) 40 50 pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); 41 51 42 52 return 1; ··· 42 62 * behavior on both 64-bit and 32-bit kernels. 43 63 * On 32-bit kernels, vdso=[012] means the same thing. 44 64 */ 45 - __setup("vdso32=", vdso_setup); 65 + __setup("vdso32=", vdso32_setup); 46 66 47 67 #ifdef CONFIG_X86_32 48 - __setup_param("vdso=", vdso32_setup, vdso_setup, 0); 49 - 50 - EXPORT_SYMBOL_GPL(vdso_enabled); 68 + __setup_param("vdso=", vdso_setup, vdso32_setup, 0); 51 69 #endif 52 - 53 - static struct page **vdso32_pages; 54 - static unsigned vdso32_size; 55 70 56 71 #ifdef CONFIG_X86_64 57 72 58 73 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) 59 74 #define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) 60 75 61 - /* May not be __init: called during resume */ 62 - void syscall32_cpu_init(void) 63 - { 64 - /* Load these always in case some future AMD CPU supports 65 - SYSENTER from compat mode too. */ 66 - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); 67 - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); 68 - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); 69 - 70 - wrmsrl(MSR_CSTAR, ia32_cstar_target); 71 - } 72 - 73 76 #else /* CONFIG_X86_32 */ 74 77 75 78 #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) 76 79 #define vdso32_syscall() (0) 77 80 78 - void enable_sep_cpu(void) 79 - { 80 - int cpu = get_cpu(); 81 - struct tss_struct *tss = &per_cpu(init_tss, cpu); 82 - 83 - if (!boot_cpu_has(X86_FEATURE_SEP)) { 84 - put_cpu(); 85 - return; 86 - } 87 - 88 - tss->x86_tss.ss1 = __KERNEL_CS; 89 - tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; 90 - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); 91 - wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); 92 - wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); 93 - put_cpu(); 94 - } 95 - 96 81 #endif /* CONFIG_X86_64 */ 82 + 83 + #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) 84 + const struct vdso_image *selected_vdso32; 85 + #endif 97 86 98 87 int __init sysenter_setup(void) 99 88 { 100 - char *vdso32_start, *vdso32_end; 101 - int npages, i; 102 - 103 89 #ifdef CONFIG_COMPAT 104 - if (vdso32_syscall()) { 105 - vdso32_start = vdso32_syscall_start; 106 - vdso32_end = vdso32_syscall_end; 107 - vdso32_pages = vdso32_syscall_pages; 108 - } else 90 + if (vdso32_syscall()) 91 + selected_vdso32 = &vdso_image_32_syscall; 92 + else 109 93 #endif 110 - if (vdso32_sysenter()) { 111 - vdso32_start = vdso32_sysenter_start; 112 - vdso32_end = vdso32_sysenter_end; 113 - vdso32_pages = vdso32_sysenter_pages; 114 - } else { 115 - vdso32_start = vdso32_int80_start; 116 - vdso32_end = vdso32_int80_end; 117 - vdso32_pages = vdso32_int80_pages; 118 - } 94 + if (vdso32_sysenter()) 95 + selected_vdso32 = &vdso_image_32_sysenter; 96 + else 97 + selected_vdso32 = &vdso_image_32_int80; 119 98 120 - npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE; 121 - vdso32_size = npages << PAGE_SHIFT; 122 - for (i = 0; i < npages; i++) 123 - vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE); 124 - 125 - patch_vdso32(vdso32_start, vdso32_size); 99 + init_vdso_image(selected_vdso32); 126 100 127 101 return 0; 128 - } 129 - 130 - /* Setup a VMA at program startup for the vsyscall page */ 131 - int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 132 - { 133 - struct mm_struct *mm = current->mm; 134 - unsigned long addr; 135 - int ret = 0; 136 - struct vm_area_struct *vma; 137 - 138 - #ifdef CONFIG_X86_X32_ABI 139 - if (test_thread_flag(TIF_X32)) 140 - return x32_setup_additional_pages(bprm, uses_interp); 141 - #endif 142 - 143 - if (vdso_enabled != 1) /* Other values all mean "disabled" */ 144 - return 0; 145 - 146 - down_write(&mm->mmap_sem); 147 - 148 - addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0); 149 - if (IS_ERR_VALUE(addr)) { 150 - ret = addr; 151 - goto up_fail; 152 - } 153 - 154 - addr += VDSO_OFFSET(VDSO_PREV_PAGES); 155 - 156 - current->mm->context.vdso = (void *)addr; 157 - 158 - /* 159 - * MAYWRITE to allow gdb to COW and set breakpoints 160 - */ 161 - ret = install_special_mapping(mm, 162 - addr, 163 - vdso32_size, 164 - VM_READ|VM_EXEC| 165 - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 166 - vdso32_pages); 167 - 168 - if (ret) 169 - goto up_fail; 170 - 171 - vma = _install_special_mapping(mm, 172 - addr - VDSO_OFFSET(VDSO_PREV_PAGES), 173 - VDSO_OFFSET(VDSO_PREV_PAGES), 174 - VM_READ, 175 - NULL); 176 - 177 - if (IS_ERR(vma)) { 178 - ret = PTR_ERR(vma); 179 - goto up_fail; 180 - } 181 - 182 - ret = remap_pfn_range(vma, 183 - addr - VDSO_OFFSET(VDSO_VVAR_PAGE), 184 - __pa_symbol(&__vvar_page) >> PAGE_SHIFT, 185 - PAGE_SIZE, 186 - PAGE_READONLY); 187 - 188 - if (ret) 189 - goto up_fail; 190 - 191 - #ifdef CONFIG_HPET_TIMER 192 - if (hpet_address) { 193 - ret = io_remap_pfn_range(vma, 194 - addr - VDSO_OFFSET(VDSO_HPET_PAGE), 195 - hpet_address >> PAGE_SHIFT, 196 - PAGE_SIZE, 197 - pgprot_noncached(PAGE_READONLY)); 198 - 199 - if (ret) 200 - goto up_fail; 201 - } 202 - #endif 203 - 204 - current_thread_info()->sysenter_return = 205 - VDSO32_SYMBOL(addr, SYSENTER_RETURN); 206 - 207 - up_fail: 208 - if (ret) 209 - current->mm->context.vdso = NULL; 210 - 211 - up_write(&mm->mmap_sem); 212 - 213 - return ret; 214 102 } 215 103 216 104 #ifdef CONFIG_X86_64 ··· 92 244 static struct ctl_table abi_table2[] = { 93 245 { 94 246 .procname = "vsyscall32", 95 - .data = &sysctl_vsyscall32, 247 + .data = &vdso32_enabled, 96 248 .maxlen = sizeof(int), 97 249 .mode = 0644, 98 250 .proc_handler = proc_dointvec ··· 118 270 #endif 119 271 120 272 #else /* CONFIG_X86_32 */ 121 - 122 - const char *arch_vma_name(struct vm_area_struct *vma) 123 - { 124 - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) 125 - return "[vdso]"; 126 - return NULL; 127 - } 128 273 129 274 struct vm_area_struct *get_gate_vma(struct mm_struct *mm) 130 275 {
-9
arch/x86/vdso/vdso32.S
··· 1 - #include <asm/vdso.h> 2 - 3 - DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so") 4 - 5 - #ifdef CONFIG_COMPAT 6 - DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so") 7 - #endif 8 - 9 - DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so")
+1 -14
arch/x86/vdso/vdso32/vdso32.lds.S
··· 1 1 /* 2 2 * Linker script for 32-bit vDSO. 3 3 * We #include the file to define the layout details. 4 - * Here we only choose the prelinked virtual address. 5 4 * 6 5 * This file defines the version script giving the user-exported symbols in 7 - * the DSO. We can define local symbols here called VDSO* to make their 8 - * values visible using the asm-x86/vdso.h macros from the kernel proper. 6 + * the DSO. 9 7 */ 10 8 11 9 #include <asm/page.h> 12 10 13 11 #define BUILD_VDSO32 14 - #define VDSO_PRELINK 0 15 12 16 13 #include "../vdso-layout.lds.S" 17 14 ··· 35 38 local: *; 36 39 }; 37 40 } 38 - 39 - /* 40 - * Symbols we define here called VDSO* get their values into vdso32-syms.h. 41 - */ 42 - VDSO32_vsyscall = __kernel_vsyscall; 43 - VDSO32_sigreturn = __kernel_sigreturn; 44 - VDSO32_rt_sigreturn = __kernel_rt_sigreturn; 45 - VDSO32_clock_gettime = clock_gettime; 46 - VDSO32_gettimeofday = gettimeofday; 47 - VDSO32_time = time;
-3
arch/x86/vdso/vdsox32.S
··· 1 - #include <asm/vdso.h> 2 - 3 - DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so")
+1 -6
arch/x86/vdso/vdsox32.lds.S
··· 1 1 /* 2 2 * Linker script for x32 vDSO. 3 3 * We #include the file to define the layout details. 4 - * Here we only choose the prelinked virtual address. 5 4 * 6 5 * This file defines the version script giving the user-exported symbols in 7 - * the DSO. We can define local symbols here called VDSO* to make their 8 - * values visible using the asm-x86/vdso.h macros from the kernel proper. 6 + * the DSO. 9 7 */ 10 8 11 - #define VDSO_PRELINK 0 12 9 #include "vdso-layout.lds.S" 13 10 14 11 /* ··· 21 24 local: *; 22 25 }; 23 26 } 24 - 25 - VDSOX32_PRELINK = VDSO_PRELINK;
+133 -109
arch/x86/vdso/vma.c
··· 15 15 #include <asm/proto.h> 16 16 #include <asm/vdso.h> 17 17 #include <asm/page.h> 18 + #include <asm/hpet.h> 18 19 19 20 #if defined(CONFIG_X86_64) 20 - unsigned int __read_mostly vdso_enabled = 1; 21 + unsigned int __read_mostly vdso64_enabled = 1; 21 22 22 - DECLARE_VDSO_IMAGE(vdso); 23 23 extern unsigned short vdso_sync_cpuid; 24 - static unsigned vdso_size; 25 - 26 - #ifdef CONFIG_X86_X32_ABI 27 - DECLARE_VDSO_IMAGE(vdsox32); 28 - static unsigned vdsox32_size; 29 - #endif 30 24 #endif 31 25 32 - #if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \ 33 - defined(CONFIG_COMPAT) 34 - void __init patch_vdso32(void *vdso, size_t len) 26 + void __init init_vdso_image(const struct vdso_image *image) 35 27 { 36 - Elf32_Ehdr *hdr = vdso; 37 - Elf32_Shdr *sechdrs, *alt_sec = 0; 38 - char *secstrings; 39 - void *alt_data; 40 28 int i; 29 + int npages = (image->size) / PAGE_SIZE; 41 30 42 - BUG_ON(len < sizeof(Elf32_Ehdr)); 43 - BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); 31 + BUG_ON(image->size % PAGE_SIZE != 0); 32 + for (i = 0; i < npages; i++) 33 + image->text_mapping.pages[i] = 34 + virt_to_page(image->data + i*PAGE_SIZE); 44 35 45 - sechdrs = (void *)hdr + hdr->e_shoff; 46 - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 47 - 48 - for (i = 1; i < hdr->e_shnum; i++) { 49 - Elf32_Shdr *shdr = &sechdrs[i]; 50 - if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { 51 - alt_sec = shdr; 52 - goto found; 53 - } 54 - } 55 - 56 - /* If we get here, it's probably a bug. */ 57 - pr_warning("patch_vdso32: .altinstructions not found\n"); 58 - return; /* nothing to patch */ 59 - 60 - found: 61 - alt_data = (void *)hdr + alt_sec->sh_offset; 62 - apply_alternatives(alt_data, alt_data + alt_sec->sh_size); 36 + apply_alternatives((struct alt_instr *)(image->data + image->alt), 37 + (struct alt_instr *)(image->data + image->alt + 38 + image->alt_len)); 63 39 } 64 - #endif 65 40 66 41 #if defined(CONFIG_X86_64) 67 - static void __init patch_vdso64(void *vdso, size_t len) 68 - { 69 - Elf64_Ehdr *hdr = vdso; 70 - Elf64_Shdr *sechdrs, *alt_sec = 0; 71 - char *secstrings; 72 - void *alt_data; 73 - int i; 74 - 75 - BUG_ON(len < sizeof(Elf64_Ehdr)); 76 - BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); 77 - 78 - sechdrs = (void *)hdr + hdr->e_shoff; 79 - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 80 - 81 - for (i = 1; i < hdr->e_shnum; i++) { 82 - Elf64_Shdr *shdr = &sechdrs[i]; 83 - if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { 84 - alt_sec = shdr; 85 - goto found; 86 - } 87 - } 88 - 89 - /* If we get here, it's probably a bug. */ 90 - pr_warning("patch_vdso64: .altinstructions not found\n"); 91 - return; /* nothing to patch */ 92 - 93 - found: 94 - alt_data = (void *)hdr + alt_sec->sh_offset; 95 - apply_alternatives(alt_data, alt_data + alt_sec->sh_size); 96 - } 97 - 98 42 static int __init init_vdso(void) 99 43 { 100 - int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; 101 - int i; 102 - 103 - patch_vdso64(vdso_start, vdso_end - vdso_start); 104 - 105 - vdso_size = npages << PAGE_SHIFT; 106 - for (i = 0; i < npages; i++) 107 - vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); 44 + init_vdso_image(&vdso_image_64); 108 45 109 46 #ifdef CONFIG_X86_X32_ABI 110 - patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start); 111 - npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; 112 - vdsox32_size = npages << PAGE_SHIFT; 113 - for (i = 0; i < npages; i++) 114 - vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE); 47 + init_vdso_image(&vdso_image_x32); 115 48 #endif 116 49 117 50 return 0; 118 51 } 119 52 subsys_initcall(init_vdso); 53 + #endif 120 54 121 55 struct linux_binprm; 122 56 123 57 /* Put the vdso above the (randomized) stack with another randomized offset. 124 58 This way there is no hole in the middle of address space. 125 59 To save memory make sure it is still in the same PTE as the stack top. 126 - This doesn't give that many random bits */ 60 + This doesn't give that many random bits. 61 + 62 + Only used for the 64-bit and x32 vdsos. */ 127 63 static unsigned long vdso_addr(unsigned long start, unsigned len) 128 64 { 129 65 unsigned long addr, end; ··· 85 149 return addr; 86 150 } 87 151 88 - /* Setup a VMA at program startup for the vsyscall page. 89 - Not called for compat tasks */ 90 - static int setup_additional_pages(struct linux_binprm *bprm, 91 - int uses_interp, 92 - struct page **pages, 93 - unsigned size) 152 + static int map_vdso(const struct vdso_image *image, bool calculate_addr) 94 153 { 95 154 struct mm_struct *mm = current->mm; 155 + struct vm_area_struct *vma; 96 156 unsigned long addr; 97 - int ret; 157 + int ret = 0; 158 + static struct page *no_pages[] = {NULL}; 159 + static struct vm_special_mapping vvar_mapping = { 160 + .name = "[vvar]", 161 + .pages = no_pages, 162 + }; 98 163 99 - if (!vdso_enabled) 100 - return 0; 164 + if (calculate_addr) { 165 + addr = vdso_addr(current->mm->start_stack, 166 + image->sym_end_mapping); 167 + } else { 168 + addr = 0; 169 + } 101 170 102 171 down_write(&mm->mmap_sem); 103 - addr = vdso_addr(mm->start_stack, size); 104 - addr = get_unmapped_area(NULL, addr, size, 0, 0); 172 + 173 + addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0); 105 174 if (IS_ERR_VALUE(addr)) { 106 175 ret = addr; 107 176 goto up_fail; 108 177 } 109 178 110 - current->mm->context.vdso = (void *)addr; 179 + current->mm->context.vdso = (void __user *)addr; 111 180 112 - ret = install_special_mapping(mm, addr, size, 113 - VM_READ|VM_EXEC| 114 - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 115 - pages); 116 - if (ret) { 117 - current->mm->context.vdso = NULL; 181 + /* 182 + * MAYWRITE to allow gdb to COW and set breakpoints 183 + */ 184 + vma = _install_special_mapping(mm, 185 + addr, 186 + image->size, 187 + VM_READ|VM_EXEC| 188 + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 189 + &image->text_mapping); 190 + 191 + if (IS_ERR(vma)) { 192 + ret = PTR_ERR(vma); 118 193 goto up_fail; 119 194 } 120 195 196 + vma = _install_special_mapping(mm, 197 + addr + image->size, 198 + image->sym_end_mapping - image->size, 199 + VM_READ, 200 + &vvar_mapping); 201 + 202 + if (IS_ERR(vma)) { 203 + ret = PTR_ERR(vma); 204 + goto up_fail; 205 + } 206 + 207 + if (image->sym_vvar_page) 208 + ret = remap_pfn_range(vma, 209 + addr + image->sym_vvar_page, 210 + __pa_symbol(&__vvar_page) >> PAGE_SHIFT, 211 + PAGE_SIZE, 212 + PAGE_READONLY); 213 + 214 + if (ret) 215 + goto up_fail; 216 + 217 + #ifdef CONFIG_HPET_TIMER 218 + if (hpet_address && image->sym_hpet_page) { 219 + ret = io_remap_pfn_range(vma, 220 + addr + image->sym_hpet_page, 221 + hpet_address >> PAGE_SHIFT, 222 + PAGE_SIZE, 223 + pgprot_noncached(PAGE_READONLY)); 224 + 225 + if (ret) 226 + goto up_fail; 227 + } 228 + #endif 229 + 121 230 up_fail: 231 + if (ret) 232 + current->mm->context.vdso = NULL; 233 + 122 234 up_write(&mm->mmap_sem); 123 235 return ret; 124 236 } 125 237 126 - int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 238 + #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) 239 + static int load_vdso32(void) 127 240 { 128 - return setup_additional_pages(bprm, uses_interp, vdso_pages, 129 - vdso_size); 130 - } 241 + int ret; 131 242 132 - #ifdef CONFIG_X86_X32_ABI 133 - int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 134 - { 135 - return setup_additional_pages(bprm, uses_interp, vdsox32_pages, 136 - vdsox32_size); 243 + if (vdso32_enabled != 1) /* Other values all mean "disabled" */ 244 + return 0; 245 + 246 + ret = map_vdso(selected_vdso32, false); 247 + if (ret) 248 + return ret; 249 + 250 + if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) 251 + current_thread_info()->sysenter_return = 252 + current->mm->context.vdso + 253 + selected_vdso32->sym_VDSO32_SYSENTER_RETURN; 254 + 255 + return 0; 137 256 } 138 257 #endif 139 258 259 + #ifdef CONFIG_X86_64 260 + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 261 + { 262 + if (!vdso64_enabled) 263 + return 0; 264 + 265 + return map_vdso(&vdso_image_64, true); 266 + } 267 + 268 + #ifdef CONFIG_COMPAT 269 + int compat_arch_setup_additional_pages(struct linux_binprm *bprm, 270 + int uses_interp) 271 + { 272 + #ifdef CONFIG_X86_X32_ABI 273 + if (test_thread_flag(TIF_X32)) { 274 + if (!vdso64_enabled) 275 + return 0; 276 + 277 + return map_vdso(&vdso_image_x32, true); 278 + } 279 + #endif 280 + 281 + return load_vdso32(); 282 + } 283 + #endif 284 + #else 285 + int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) 286 + { 287 + return load_vdso32(); 288 + } 289 + #endif 290 + 291 + #ifdef CONFIG_X86_64 140 292 static __init int vdso_setup(char *s) 141 293 { 142 - vdso_enabled = simple_strtoul(s, NULL, 0); 294 + vdso64_enabled = simple_strtoul(s, NULL, 0); 143 295 return 0; 144 296 } 145 297 __setup("vdso=", vdso_setup);
+3 -5
arch/x86/xen/mmu.c
··· 1494 1494 page->private = (unsigned long)user_pgd; 1495 1495 1496 1496 if (user_pgd != NULL) { 1497 - user_pgd[pgd_index(VSYSCALL_START)] = 1497 + user_pgd[pgd_index(VSYSCALL_ADDR)] = 1498 1498 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); 1499 1499 ret = 0; 1500 1500 } ··· 2062 2062 case FIX_KMAP_BEGIN ... FIX_KMAP_END: 2063 2063 # endif 2064 2064 #else 2065 - case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: 2066 - case VVAR_PAGE: 2065 + case VSYSCALL_PAGE: 2067 2066 #endif 2068 2067 case FIX_TEXT_POKE0: 2069 2068 case FIX_TEXT_POKE1: ··· 2103 2104 #ifdef CONFIG_X86_64 2104 2105 /* Replicate changes to map the vsyscall page into the user 2105 2106 pagetable vsyscall mapping. */ 2106 - if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) || 2107 - idx == VVAR_PAGE) { 2107 + if (idx == VSYSCALL_PAGE) { 2108 2108 unsigned long vaddr = __fix_to_virt(idx); 2109 2109 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); 2110 2110 }
+9 -2
arch/x86/xen/setup.c
··· 525 525 static void __init fiddle_vdso(void) 526 526 { 527 527 #ifdef CONFIG_X86_32 528 + /* 529 + * This could be called before selected_vdso32 is initialized, so 530 + * just fiddle with both possible images. vdso_image_32_syscall 531 + * can't be selected, since it only exists on 64-bit systems. 532 + */ 528 533 u32 *mask; 529 - mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); 534 + mask = vdso_image_32_int80.data + 535 + vdso_image_32_int80.sym_VDSO32_NOTE_MASK; 530 536 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 531 - mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); 537 + mask = vdso_image_32_sysenter.data + 538 + vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; 532 539 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 533 540 #endif 534 541 }
+8
fs/binfmt_elf.c
··· 1108 1108 /* Any vsyscall mappings? */ 1109 1109 if (vma == get_gate_vma(vma->vm_mm)) 1110 1110 return true; 1111 + 1112 + /* 1113 + * Assume that all vmas with a .name op should always be dumped. 1114 + * If this changes, a new vm_ops field can easily be added. 1115 + */ 1116 + if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1117 + return true; 1118 + 1111 1119 /* 1112 1120 * arch_vma_name() returns non-NULL for special architecture mappings, 1113 1121 * such as vDSO sections.
+6
fs/proc/task_mmu.c
··· 300 300 goto done; 301 301 } 302 302 303 + if (vma->vm_ops && vma->vm_ops->name) { 304 + name = vma->vm_ops->name(vma); 305 + if (name) 306 + goto done; 307 + } 308 + 303 309 name = arch_vma_name(vma); 304 310 if (!name) { 305 311 pid_t tid;
+9 -1
include/linux/mm.h
··· 239 239 */ 240 240 int (*access)(struct vm_area_struct *vma, unsigned long addr, 241 241 void *buf, int len, int write); 242 + 243 + /* Called by the /proc/PID/maps code to ask the vma whether it 244 + * has a special name. Returning non-NULL will also cause this 245 + * vma to be dumped unconditionally. */ 246 + const char *(*name)(struct vm_area_struct *vma); 247 + 242 248 #ifdef CONFIG_NUMA 243 249 /* 244 250 * set_policy() op must add a reference to any non-NULL @new mempolicy ··· 1789 1783 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages); 1790 1784 extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, 1791 1785 unsigned long addr, unsigned long len, 1792 - unsigned long flags, struct page **pages); 1786 + unsigned long flags, 1787 + const struct vm_special_mapping *spec); 1788 + /* This is an obsolete alternative to _install_special_mapping. */ 1793 1789 extern int install_special_mapping(struct mm_struct *mm, 1794 1790 unsigned long addr, unsigned long len, 1795 1791 unsigned long flags, struct page **pages);
+6
include/linux/mm_types.h
··· 510 510 } 511 511 #endif 512 512 513 + struct vm_special_mapping 514 + { 515 + const char *name; 516 + struct page **pages; 517 + }; 518 + 513 519 #endif /* _LINUX_MM_TYPES_H */
+5
kernel/sysctl.c
··· 1418 1418 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 1419 1419 { 1420 1420 .procname = "vdso_enabled", 1421 + #ifdef CONFIG_X86_32 1422 + .data = &vdso32_enabled, 1423 + .maxlen = sizeof(vdso32_enabled), 1424 + #else 1421 1425 .data = &vdso_enabled, 1422 1426 .maxlen = sizeof(vdso_enabled), 1427 + #endif 1423 1428 .mode = 0644, 1424 1429 .proc_handler = proc_dointvec, 1425 1430 .extra1 = &zero,
+60 -29
mm/mmap.c
··· 2871 2871 return 1; 2872 2872 } 2873 2873 2874 + static int special_mapping_fault(struct vm_area_struct *vma, 2875 + struct vm_fault *vmf); 2876 + 2877 + /* 2878 + * Having a close hook prevents vma merging regardless of flags. 2879 + */ 2880 + static void special_mapping_close(struct vm_area_struct *vma) 2881 + { 2882 + } 2883 + 2884 + static const char *special_mapping_name(struct vm_area_struct *vma) 2885 + { 2886 + return ((struct vm_special_mapping *)vma->vm_private_data)->name; 2887 + } 2888 + 2889 + static const struct vm_operations_struct special_mapping_vmops = { 2890 + .close = special_mapping_close, 2891 + .fault = special_mapping_fault, 2892 + .name = special_mapping_name, 2893 + }; 2894 + 2895 + static const struct vm_operations_struct legacy_special_mapping_vmops = { 2896 + .close = special_mapping_close, 2897 + .fault = special_mapping_fault, 2898 + }; 2874 2899 2875 2900 static int special_mapping_fault(struct vm_area_struct *vma, 2876 2901 struct vm_fault *vmf) ··· 2911 2886 */ 2912 2887 pgoff = vmf->pgoff - vma->vm_pgoff; 2913 2888 2914 - for (pages = vma->vm_private_data; pgoff && *pages; ++pages) 2889 + if (vma->vm_ops == &legacy_special_mapping_vmops) 2890 + pages = vma->vm_private_data; 2891 + else 2892 + pages = ((struct vm_special_mapping *)vma->vm_private_data)-> 2893 + pages; 2894 + 2895 + for (; pgoff && *pages; ++pages) 2915 2896 pgoff--; 2916 2897 2917 2898 if (*pages) { ··· 2930 2899 return VM_FAULT_SIGBUS; 2931 2900 } 2932 2901 2933 - /* 2934 - * Having a close hook prevents vma merging regardless of flags. 2935 - */ 2936 - static void special_mapping_close(struct vm_area_struct *vma) 2937 - { 2938 - } 2939 - 2940 - static const struct vm_operations_struct special_mapping_vmops = { 2941 - .close = special_mapping_close, 2942 - .fault = special_mapping_fault, 2943 - }; 2944 - 2945 - /* 2946 - * Called with mm->mmap_sem held for writing. 2947 - * Insert a new vma covering the given region, with the given flags. 2948 - * Its pages are supplied by the given array of struct page *. 2949 - * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. 2950 - * The region past the last page supplied will always produce SIGBUS. 2951 - * The array pointer and the pages it points to are assumed to stay alive 2952 - * for as long as this mapping might exist. 2953 - */ 2954 - struct vm_area_struct *_install_special_mapping(struct mm_struct *mm, 2955 - unsigned long addr, unsigned long len, 2956 - unsigned long vm_flags, struct page **pages) 2902 + static struct vm_area_struct *__install_special_mapping( 2903 + struct mm_struct *mm, 2904 + unsigned long addr, unsigned long len, 2905 + unsigned long vm_flags, const struct vm_operations_struct *ops, 2906 + void *priv) 2957 2907 { 2958 2908 int ret; 2959 2909 struct vm_area_struct *vma; ··· 2951 2939 vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; 2952 2940 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 2953 2941 2954 - vma->vm_ops = &special_mapping_vmops; 2955 - vma->vm_private_data = pages; 2942 + vma->vm_ops = ops; 2943 + vma->vm_private_data = priv; 2956 2944 2957 2945 ret = insert_vm_struct(mm, vma); 2958 2946 if (ret) ··· 2969 2957 return ERR_PTR(ret); 2970 2958 } 2971 2959 2960 + /* 2961 + * Called with mm->mmap_sem held for writing. 2962 + * Insert a new vma covering the given region, with the given flags. 2963 + * Its pages are supplied by the given array of struct page *. 2964 + * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated. 2965 + * The region past the last page supplied will always produce SIGBUS. 2966 + * The array pointer and the pages it points to are assumed to stay alive 2967 + * for as long as this mapping might exist. 2968 + */ 2969 + struct vm_area_struct *_install_special_mapping( 2970 + struct mm_struct *mm, 2971 + unsigned long addr, unsigned long len, 2972 + unsigned long vm_flags, const struct vm_special_mapping *spec) 2973 + { 2974 + return __install_special_mapping(mm, addr, len, vm_flags, 2975 + &special_mapping_vmops, (void *)spec); 2976 + } 2977 + 2972 2978 int install_special_mapping(struct mm_struct *mm, 2973 2979 unsigned long addr, unsigned long len, 2974 2980 unsigned long vm_flags, struct page **pages) 2975 2981 { 2976 - struct vm_area_struct *vma = _install_special_mapping(mm, 2977 - addr, len, vm_flags, pages); 2982 + struct vm_area_struct *vma = __install_special_mapping( 2983 + mm, addr, len, vm_flags, &legacy_special_mapping_vmops, 2984 + (void *)pages); 2978 2985 2979 2986 return PTR_ERR_OR_ZERO(vma); 2980 2987 }