Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/vdso: Remove runtime 32-bit vDSO selection

32-bit userspace will now always see the same vDSO, which is
exactly what used to be the int80 vDSO. Subsequent patches will
clean it up and make it support SYSENTER and SYSCALL using
alternatives.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/e7e6b3526fa442502e6125fe69486aab50813c32.1444091584.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
0a6d1fa0 b611acf4

+21 -279
+11 -24
arch/x86/entry/vdso/Makefile
··· 19 19 # vDSO images to build 20 20 vdso_img-$(VDSO64-y) += 64 21 21 vdso_img-$(VDSOX32-y) += x32 22 - vdso_img-$(VDSO32-y) += 32-int80 23 - vdso_img-$(CONFIG_IA32_EMULATION) += 32-syscall 24 - vdso_img-$(VDSO32-y) += 32-sysenter 22 + vdso_img-$(VDSO32-y) += 32 25 23 26 24 obj-$(VDSO32-y) += vdso32-setup.o 27 25 ··· 120 122 $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE 121 123 $(call if_changed,vdso) 122 124 123 - # 124 - # Build multiple 32-bit vDSO images to choose from at boot time. 125 - # 126 - vdso32.so-$(VDSO32-y) += int80 127 - vdso32.so-$(CONFIG_IA32_EMULATION) += syscall 128 - vdso32.so-$(VDSO32-y) += sysenter 129 - 130 - vdso32-images = $(vdso32.so-y:%=vdso32-%.so) 131 - 132 125 CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 133 126 VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 134 127 ··· 128 139 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ 129 140 130 141 targets += vdso32/vdso32.lds 131 - targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) 142 + targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o 132 143 targets += vdso32/vclock_gettime.o 133 144 134 - $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) 135 - 136 145 KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) 137 - $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) 138 - $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 146 + $(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) 147 + $(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 139 148 140 149 KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) 141 150 KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) ··· 144 157 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) 145 158 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer 146 159 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING 147 - $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 160 + $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 148 161 149 - $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ 150 - $(obj)/vdso32/vdso32.lds \ 151 - $(obj)/vdso32/vclock_gettime.o \ 152 - $(obj)/vdso32/note.o \ 153 - $(obj)/vdso32/%.o 162 + $(obj)/vdso32.so.dbg: FORCE \ 163 + $(obj)/vdso32/vdso32.lds \ 164 + $(obj)/vdso32/vclock_gettime.o \ 165 + $(obj)/vdso32/note.o \ 166 + $(obj)/vdso32/system_call.o 154 167 $(call if_changed,vdso) 155 168 156 169 # ··· 193 206 PHONY += vdso_install $(vdso_img_insttargets) 194 207 vdso_install: $(vdso_img_insttargets) FORCE 195 208 196 - clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so* 209 + clean-files := vdso32.so vdso32.so.dbg vdso64* vdso-image-*.c vdsox32.so*
-1
arch/x86/entry/vdso/vdso2c.c
··· 98 98 "VDSO_FAKE_SECTION_TABLE_END", false 99 99 }, 100 100 {"VDSO32_NOTE_MASK", true}, 101 - {"VDSO32_SYSENTER_RETURN", true}, 102 101 {"__kernel_vsyscall", true}, 103 102 {"__kernel_sigreturn", true}, 104 103 {"__kernel_rt_sigreturn", true},
+1 -27
arch/x86/entry/vdso/vdso32-setup.c
··· 48 48 __setup_param("vdso=", vdso_setup, vdso32_setup, 0); 49 49 #endif 50 50 51 - #ifdef CONFIG_X86_64 52 - 53 - #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) 54 - #define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) 55 - 56 - #else /* CONFIG_X86_32 */ 57 - 58 - #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) 59 - #define vdso32_syscall() (0) 60 - 61 - #endif /* CONFIG_X86_64 */ 62 - 63 - #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) 64 - const struct vdso_image *selected_vdso32; 65 - #endif 66 - 67 51 int __init sysenter_setup(void) 68 52 { 69 - #ifdef CONFIG_COMPAT 70 - if (vdso32_syscall()) 71 - selected_vdso32 = &vdso_image_32_syscall; 72 - else 73 - #endif 74 - if (vdso32_sysenter()) 75 - selected_vdso32 = &vdso_image_32_sysenter; 76 - else 77 - selected_vdso32 = &vdso_image_32_int80; 78 - 79 - init_vdso_image(selected_vdso32); 53 + init_vdso_image(&vdso_image_32); 80 54 81 55 return 0; 82 56 }
arch/x86/entry/vdso/vdso32/int80.S arch/x86/entry/vdso/vdso32/system_call.S
-75
arch/x86/entry/vdso/vdso32/syscall.S
··· 1 - /* 2 - * Code for the vDSO. This version uses the syscall instruction. 3 - * 4 - * First get the common code for the sigreturn entry points. 5 - * This must come first. 6 - */ 7 - #define SYSCALL_ENTER_KERNEL syscall 8 - #include "sigreturn.S" 9 - 10 - #include <asm/segment.h> 11 - 12 - .text 13 - .globl __kernel_vsyscall 14 - .type __kernel_vsyscall,@function 15 - ALIGN 16 - __kernel_vsyscall: 17 - .LSTART_vsyscall: 18 - push %ebp 19 - .Lpush_ebp: 20 - movl %ecx, %ebp 21 - syscall 22 - movl %ebp, %ecx 23 - popl %ebp 24 - .Lpop_ebp: 25 - ret 26 - .LEND_vsyscall: 27 - .size __kernel_vsyscall,.-.LSTART_vsyscall 28 - 29 - .section .eh_frame,"a",@progbits 30 - .LSTARTFRAME: 31 - .long .LENDCIE-.LSTARTCIE 32 - .LSTARTCIE: 33 - .long 0 /* CIE ID */ 34 - .byte 1 /* Version number */ 35 - .string "zR" /* NUL-terminated augmentation string */ 36 - .uleb128 1 /* Code alignment factor */ 37 - .sleb128 -4 /* Data alignment factor */ 38 - .byte 8 /* Return address register column */ 39 - .uleb128 1 /* Augmentation value length */ 40 - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ 41 - .byte 0x0c /* DW_CFA_def_cfa */ 42 - .uleb128 4 43 - .uleb128 4 44 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ 45 - .uleb128 1 46 - .align 4 47 - .LENDCIE: 48 - 49 - .long .LENDFDE1-.LSTARTFDE1 /* Length FDE */ 50 - .LSTARTFDE1: 51 - .long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */ 52 - .long .LSTART_vsyscall-. /* PC-relative start address */ 53 - .long .LEND_vsyscall-.LSTART_vsyscall 54 - .uleb128 0 /* Augmentation length */ 55 - /* What follows are the instructions for the table generation. 56 - We have to record all changes of the stack pointer. */ 57 - .byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */ 58 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 59 - .uleb128 8 60 - .byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */ 61 - .byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */ 62 - .byte 0xc5 /* DW_CFA_restore %ebp */ 63 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 64 - .uleb128 4 65 - .align 4 66 - .LENDFDE1: 67 - .previous 68 - 69 - /* 70 - * Pad out the segment to match the size of the sysenter.S version. 71 - */ 72 - VDSO32_vsyscall_eh_frame_size = 0x40 73 - .section .data,"aw",@progbits 74 - .space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0 75 - .previous
-116
arch/x86/entry/vdso/vdso32/sysenter.S
··· 1 - /* 2 - * Code for the vDSO. This version uses the sysenter instruction. 3 - * 4 - * First get the common code for the sigreturn entry points. 5 - * This must come first. 6 - */ 7 - #include "sigreturn.S" 8 - 9 - /* 10 - * The caller puts arg2 in %ecx, which gets pushed. The kernel will use 11 - * %ecx itself for arg2. The pushing is because the sysexit instruction 12 - * (found in entry.S) requires that we clobber %ecx with the desired %esp. 13 - * User code might expect that %ecx is unclobbered though, as it would be 14 - * for returning via the iret instruction, so we must push and pop. 15 - * 16 - * The caller puts arg3 in %edx, which the sysexit instruction requires 17 - * for %eip. Thus, exactly as for arg2, we must push and pop. 18 - * 19 - * Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter 20 - * instruction clobbers %esp, the user's %esp won't even survive entry 21 - * into the kernel. We store %esp in %ebp. Code in entry.S must fetch 22 - * arg6 from the stack. 23 - * 24 - * You can not use this vsyscall for the clone() syscall because the 25 - * three words on the parent stack do not get copied to the child. 26 - */ 27 - .text 28 - .globl __kernel_vsyscall 29 - .type __kernel_vsyscall,@function 30 - ALIGN 31 - __kernel_vsyscall: 32 - .LSTART_vsyscall: 33 - push %ecx 34 - .Lpush_ecx: 35 - push %edx 36 - .Lpush_edx: 37 - push %ebp 38 - .Lenter_kernel: 39 - movl %esp,%ebp 40 - sysenter 41 - 42 - /* 7: align return point with nop's to make disassembly easier */ 43 - .space 7,0x90 44 - 45 - /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ 46 - int $0x80 47 - /* 16: System call normal return point is here! */ 48 - VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */ 49 - pop %ebp 50 - .Lpop_ebp: 51 - pop %edx 52 - .Lpop_edx: 53 - pop %ecx 54 - .Lpop_ecx: 55 - ret 56 - .LEND_vsyscall: 57 - .size __kernel_vsyscall,.-.LSTART_vsyscall 58 - .previous 59 - 60 - .section .eh_frame,"a",@progbits 61 - .LSTARTFRAMEDLSI: 62 - .long .LENDCIEDLSI-.LSTARTCIEDLSI 63 - .LSTARTCIEDLSI: 64 - .long 0 /* CIE ID */ 65 - .byte 1 /* Version number */ 66 - .string "zR" /* NUL-terminated augmentation string */ 67 - .uleb128 1 /* Code alignment factor */ 68 - .sleb128 -4 /* Data alignment factor */ 69 - .byte 8 /* Return address register column */ 70 - .uleb128 1 /* Augmentation value length */ 71 - .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */ 72 - .byte 0x0c /* DW_CFA_def_cfa */ 73 - .uleb128 4 74 - .uleb128 4 75 - .byte 0x88 /* DW_CFA_offset, column 0x8 */ 76 - .uleb128 1 77 - .align 4 78 - .LENDCIEDLSI: 79 - .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */ 80 - .LSTARTFDEDLSI: 81 - .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */ 82 - .long .LSTART_vsyscall-. /* PC-relative start address */ 83 - .long .LEND_vsyscall-.LSTART_vsyscall 84 - .uleb128 0 85 - /* What follows are the instructions for the table generation. 86 - We have to record all changes of the stack pointer. */ 87 - .byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */ 88 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 89 - .byte 0x08 /* RA at offset 8 now */ 90 - .byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */ 91 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 92 - .byte 0x0c /* RA at offset 12 now */ 93 - .byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */ 94 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 95 - .byte 0x10 /* RA at offset 16 now */ 96 - .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */ 97 - /* Finally the epilogue. */ 98 - .byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */ 99 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 100 - .byte 0x0c /* RA at offset 12 now */ 101 - .byte 0xc5 /* DW_CFA_restore %ebp */ 102 - .byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */ 103 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 104 - .byte 0x08 /* RA at offset 8 now */ 105 - .byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */ 106 - .byte 0x0e /* DW_CFA_def_cfa_offset */ 107 - .byte 0x04 /* RA at offset 4 now */ 108 - .align 4 109 - .LENDFDEDLSI: 110 - .previous 111 - 112 - /* 113 - * Emit a symbol with the size of this .eh_frame data, 114 - * to verify it matches the other versions. 115 - */ 116 - VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
+1 -12
arch/x86/entry/vdso/vma.c
··· 180 180 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 181 181 static int load_vdso32(void) 182 182 { 183 - int ret; 184 - 185 183 if (vdso32_enabled != 1) /* Other values all mean "disabled" */ 186 184 return 0; 187 185 188 - ret = map_vdso(selected_vdso32, false); 189 - if (ret) 190 - return ret; 191 - 192 - if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) 193 - current_thread_info()->sysenter_return = 194 - current->mm->context.vdso + 195 - selected_vdso32->sym_VDSO32_SYSENTER_RETURN; 196 - 197 - return 0; 186 + return map_vdso(&vdso_image_32, false); 198 187 } 199 188 #endif 200 189
+2 -2
arch/x86/ia32/ia32_signal.c
··· 289 289 /* Return stub is in 32bit vsyscall page */ 290 290 if (current->mm->context.vdso) 291 291 restorer = current->mm->context.vdso + 292 - selected_vdso32->sym___kernel_sigreturn; 292 + vdso_image_32.sym___kernel_sigreturn; 293 293 else 294 294 restorer = &frame->retcode; 295 295 } ··· 368 368 restorer = ksig->ka.sa.sa_restorer; 369 369 else 370 370 restorer = current->mm->context.vdso + 371 - selected_vdso32->sym___kernel_rt_sigreturn; 371 + vdso_image_32.sym___kernel_rt_sigreturn; 372 372 put_user_ex(ptr_to_compat(restorer), &frame->pretcode); 373 373 374 374 /*
+1 -1
arch/x86/include/asm/elf.h
··· 328 328 329 329 #define VDSO_ENTRY \ 330 330 ((unsigned long)current->mm->context.vdso + \ 331 - selected_vdso32->sym___kernel_vsyscall) 331 + vdso_image_32.sym___kernel_vsyscall) 332 332 333 333 struct linux_binprm; 334 334
+1 -8
arch/x86/include/asm/vdso.h
··· 26 26 long sym___kernel_sigreturn; 27 27 long sym___kernel_rt_sigreturn; 28 28 long sym___kernel_vsyscall; 29 - long sym_VDSO32_SYSENTER_RETURN; 30 29 }; 31 30 32 31 #ifdef CONFIG_X86_64 ··· 37 38 #endif 38 39 39 40 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT 40 - extern const struct vdso_image vdso_image_32_int80; 41 - #ifdef CONFIG_COMPAT 42 - extern const struct vdso_image vdso_image_32_syscall; 43 - #endif 44 - extern const struct vdso_image vdso_image_32_sysenter; 45 - 46 - extern const struct vdso_image *selected_vdso32; 41 + extern const struct vdso_image vdso_image_32; 47 42 #endif 48 43 49 44 extern void __init init_vdso_image(const struct vdso_image *image);
+2 -2
arch/x86/kernel/signal.c
··· 299 299 300 300 if (current->mm->context.vdso) 301 301 restorer = current->mm->context.vdso + 302 - selected_vdso32->sym___kernel_sigreturn; 302 + vdso_image_32.sym___kernel_sigreturn; 303 303 else 304 304 restorer = &frame->retcode; 305 305 if (ksig->ka.sa.sa_flags & SA_RESTORER) ··· 363 363 364 364 /* Set up to return from userspace. */ 365 365 restorer = current->mm->context.vdso + 366 - selected_vdso32->sym___kernel_rt_sigreturn; 366 + vdso_image_32.sym___kernel_rt_sigreturn; 367 367 if (ksig->ka.sa.sa_flags & SA_RESTORER) 368 368 restorer = ksig->ka.sa.sa_restorer; 369 369 put_user_ex(restorer, &frame->pretcode);
+2 -11
arch/x86/xen/setup.c
··· 965 965 static void __init fiddle_vdso(void) 966 966 { 967 967 #ifdef CONFIG_X86_32 968 - /* 969 - * This could be called before selected_vdso32 is initialized, so 970 - * just fiddle with both possible images. vdso_image_32_syscall 971 - * can't be selected, since it only exists on 64-bit systems. 972 - */ 973 - u32 *mask; 974 - mask = vdso_image_32_int80.data + 975 - vdso_image_32_int80.sym_VDSO32_NOTE_MASK; 976 - *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 977 - mask = vdso_image_32_sysenter.data + 978 - vdso_image_32_sysenter.sym_VDSO32_NOTE_MASK; 968 + u32 *mask = vdso_image_32.data + 969 + vdso_image_32.sym_VDSO32_NOTE_MASK; 979 970 *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; 980 971 #endif 981 972 }