Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry updates from Ingo Molnar:
"This contains x32 and compat syscall improvements, the biggest one of
which splits x32 syscalls into their own table, which allows new
syscalls to share the x32 and x86-64 number - which turns the
512-547 special syscall numbers range into a legacy wart that won't be
extended going forward"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/syscalls: Split the x32 syscalls into their own table
x86/syscalls: Disallow compat entries for all types of 64-bit syscalls
x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask()
x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long

+168 -32
+7 -6
arch/x86/entry/common.c
··· 285 285 if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) 286 286 nr = syscall_trace_enter(regs); 287 287 288 - /* 289 - * NB: Native and x32 syscalls are dispatched from the same 290 - * table. The only functional difference is the x32 bit in 291 - * regs->orig_ax, which changes the behavior of some syscalls. 292 - */ 293 - nr &= __SYSCALL_MASK; 294 288 if (likely(nr < NR_syscalls)) { 295 289 nr = array_index_nospec(nr, NR_syscalls); 296 290 regs->ax = sys_call_table[nr](regs); 291 + #ifdef CONFIG_X86_X32_ABI 292 + } else if (likely((nr & __X32_SYSCALL_BIT) && 293 + (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) { 294 + nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT, 295 + X32_NR_syscalls); 296 + regs->ax = x32_sys_call_table[nr](regs); 297 + #endif 297 298 } 298 299 299 300 syscall_return_slowpath(regs);
+25
arch/x86/entry/syscall_64.c
··· 10 10 /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */ 11 11 extern asmlinkage long sys_ni_syscall(const struct pt_regs *); 12 12 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *); 13 + #define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual) 13 14 #include <asm/syscalls_64.h> 14 15 #undef __SYSCALL_64 16 + #undef __SYSCALL_X32 15 17 16 18 #define __SYSCALL_64(nr, sym, qual) [nr] = sym, 19 + #define __SYSCALL_X32(nr, sym, qual) 17 20 18 21 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { 19 22 /* ··· 26 23 [0 ... __NR_syscall_max] = &sys_ni_syscall, 27 24 #include <asm/syscalls_64.h> 28 25 }; 26 + 27 + #undef __SYSCALL_64 28 + #undef __SYSCALL_X32 29 + 30 + #ifdef CONFIG_X86_X32_ABI 31 + 32 + #define __SYSCALL_64(nr, sym, qual) 33 + #define __SYSCALL_X32(nr, sym, qual) [nr] = sym, 34 + 35 + asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = { 36 + /* 37 + * Smells like a compiler bug -- it doesn't work 38 + * when the & below is removed. 39 + */ 40 + [0 ... __NR_syscall_x32_max] = &sys_ni_syscall, 41 + #include <asm/syscalls_64.h> 42 + }; 43 + 44 + #undef __SYSCALL_64 45 + #undef __SYSCALL_X32 46 + 47 + #endif
+2 -2
arch/x86/entry/syscalls/syscall_32.tbl
··· 186 186 172 i386 prctl sys_prctl __ia32_sys_prctl 187 187 173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn 188 188 174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction 189 - 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_sys_rt_sigprocmask 189 + 175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask 190 190 176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending 191 191 177 i386 rt_sigtimedwait sys_rt_sigtimedwait_time32 __ia32_compat_sys_rt_sigtimedwait_time32 192 192 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo __ia32_compat_sys_rt_sigqueueinfo 193 - 179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_sys_rt_sigsuspend 193 + 179 i386 rt_sigsuspend sys_rt_sigsuspend __ia32_compat_sys_rt_sigsuspend 194 194 180 i386 pread64 sys_pread64 __ia32_compat_sys_x86_pread 195 195 181 i386 pwrite64 sys_pwrite64 __ia32_compat_sys_x86_pwrite 196 196 182 i386 chown sys_chown16 __ia32_sys_chown16
+19 -16
arch/x86/entry/syscalls/syscalltbl.sh
··· 1 - #!/bin/sh 1 + #!/bin/bash 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 4 in="$1" 5 5 out="$2" 6 6 7 7 syscall_macro() { 8 - abi="$1" 9 - nr="$2" 10 - entry="$3" 8 + local abi="$1" 9 + local nr="$2" 10 + local entry="$3" 11 11 12 12 # Entry can be either just a function name or "function/qualifier" 13 13 real_entry="${entry%%/*}" ··· 21 21 } 22 22 23 23 emit() { 24 - abi="$1" 25 - nr="$2" 26 - entry="$3" 27 - compat="$4" 28 - umlentry="" 24 + local abi="$1" 25 + local nr="$2" 26 + local entry="$3" 27 + local compat="$4" 28 + local umlentry="" 29 29 30 - if [ "$abi" = "64" -a -n "$compat" ]; then 31 - echo "a compat entry for a 64-bit syscall makes no sense" >&2 30 + if [ "$abi" != "I386" -a -n "$compat" ]; then 31 + echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2 32 32 exit 1 33 33 fi 34 34 ··· 62 62 while read nr abi name entry compat; do 63 63 abi=`echo "$abi" | tr '[a-z]' '[A-Z]'` 64 64 if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then 65 - # COMMON is the same as 64, except that we don't expect X32 66 - # programs to use it. Our expectation has nothing to do with 67 - # any generated code, so treat them the same. 68 65 emit 64 "$nr" "$entry" "$compat" 66 + if [ "$abi" = "COMMON" ]; then 67 + # COMMON means that this syscall exists in the same form for 68 + # 64-bit and X32. 69 + echo "#ifdef CONFIG_X86_X32_ABI" 70 + emit X32 "$nr" "$entry" "$compat" 71 + echo "#endif" 72 + fi 69 73 elif [ "$abi" = "X32" ]; then 70 - # X32 is equivalent to 64 on an X32-compatible kernel. 71 74 echo "#ifdef CONFIG_X86_X32_ABI" 72 - emit 64 "$nr" "$entry" "$compat" 75 + emit X32 "$nr" "$entry" "$compat" 73 76 echo "#endif" 74 77 elif [ "$abi" = "I386" ]; then 75 78 emit "$abi" "$nr" "$entry" "$compat"
+4
arch/x86/include/asm/syscall.h
··· 36 36 extern const sys_call_ptr_t ia32_sys_call_table[]; 37 37 #endif 38 38 39 + #ifdef CONFIG_X86_X32_ABI 40 + extern const sys_call_ptr_t x32_sys_call_table[]; 41 + #endif 42 + 39 43 /* 40 44 * Only the low 32 bits of orig_ax are meaningful, so we return int. 41 45 * This importantly ignores the high bits on 64-bit, so comparisons
-6
arch/x86/include/asm/unistd.h
··· 5 5 #include <uapi/asm/unistd.h> 6 6 7 7 8 - # ifdef CONFIG_X86_X32_ABI 9 - # define __SYSCALL_MASK (~(__X32_SYSCALL_BIT)) 10 - # else 11 - # define __SYSCALL_MASK (~0) 12 - # endif 13 - 14 8 # ifdef CONFIG_X86_32 15 9 16 10 # include <asm/unistd_32.h>
+1 -1
arch/x86/include/uapi/asm/unistd.h
··· 3 3 #define _UAPI_ASM_X86_UNISTD_H 4 4 5 5 /* x32 syscall flag bit */ 6 - #define __X32_SYSCALL_BIT 0x40000000 6 + #define __X32_SYSCALL_BIT 0x40000000UL 7 7 8 8 #ifndef __KERNEL__ 9 9 # ifdef __i386__
+20
arch/x86/kernel/asm-offsets_64.c
··· 6 6 #include <asm/ia32.h> 7 7 8 8 #define __SYSCALL_64(nr, sym, qual) [nr] = 1, 9 + #define __SYSCALL_X32(nr, sym, qual) 9 10 static char syscalls_64[] = { 10 11 #include <asm/syscalls_64.h> 11 12 }; 13 + #undef __SYSCALL_64 14 + #undef __SYSCALL_X32 15 + 16 + #ifdef CONFIG_X86_X32_ABI 17 + #define __SYSCALL_64(nr, sym, qual) 18 + #define __SYSCALL_X32(nr, sym, qual) [nr] = 1, 19 + static char syscalls_x32[] = { 20 + #include <asm/syscalls_64.h> 21 + }; 22 + #undef __SYSCALL_64 23 + #undef __SYSCALL_X32 24 + #endif 25 + 12 26 #define __SYSCALL_I386(nr, sym, qual) [nr] = 1, 13 27 static char syscalls_ia32[] = { 14 28 #include <asm/syscalls_32.h> 15 29 }; 30 + #undef __SYSCALL_I386 16 31 17 32 #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) 18 33 #include <asm/kvm_para.h> ··· 94 79 95 80 DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1); 96 81 DEFINE(NR_syscalls, sizeof(syscalls_64)); 82 + 83 + #ifdef CONFIG_X86_X32_ABI 84 + DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1); 85 + DEFINE(X32_NR_syscalls, sizeof(syscalls_x32)); 86 + #endif 97 87 98 88 DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1); 99 89 DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
+1 -1
tools/testing/selftests/x86/Makefile
··· 17 17 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ 18 18 test_FCMOV test_FCOMI test_FISTTP \ 19 19 vdso_restorer 20 - TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip 20 + TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering 21 21 # Some selftests require 32bit support enabled also on 64bit systems 22 22 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall 23 23
+89
tools/testing/selftests/x86/syscall_numbering.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args 4 + * Copyright (c) 2018 Andrew Lutomirski 5 + */ 6 + 7 + #define _GNU_SOURCE 8 + 9 + #include <stdlib.h> 10 + #include <stdio.h> 11 + #include <stdbool.h> 12 + #include <errno.h> 13 + #include <unistd.h> 14 + #include <syscall.h> 15 + 16 + static int nerrs; 17 + 18 + #define X32_BIT 0x40000000UL 19 + 20 + static void check_enosys(unsigned long nr, bool *ok) 21 + { 22 + /* If this fails, a segfault is reasonably likely. */ 23 + fflush(stdout); 24 + 25 + long ret = syscall(nr, 0, 0, 0, 0, 0, 0); 26 + if (ret == 0) { 27 + printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr); 28 + *ok = false; 29 + } else if (errno != ENOSYS) { 30 + printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno); 31 + *ok = false; 32 + } 33 + } 34 + 35 + static void test_x32_without_x32_bit(void) 36 + { 37 + bool ok = true; 38 + 39 + /* 40 + * Syscalls 512-547 are "x32" syscalls. They are intended to be 41 + * called with the x32 (0x40000000) bit set. Calling them without 42 + * the x32 bit set is nonsense and should not work. 43 + */ 44 + printf("[RUN]\tChecking syscalls 512-547\n"); 45 + for (int i = 512; i <= 547; i++) 46 + check_enosys(i, &ok); 47 + 48 + /* 49 + * Check that a handful of 64-bit-only syscalls are rejected if the x32 50 + * bit is set. 51 + */ 52 + printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n"); 53 + check_enosys(16 | X32_BIT, &ok); /* ioctl */ 54 + check_enosys(19 | X32_BIT, &ok); /* readv */ 55 + check_enosys(20 | X32_BIT, &ok); /* writev */ 56 + 57 + /* 58 + * Check some syscalls with high bits set. 59 + */ 60 + printf("[RUN]\tChecking numbers above 2^32-1\n"); 61 + check_enosys((1UL << 32), &ok); 62 + check_enosys(X32_BIT | (1UL << 32), &ok); 63 + 64 + if (!ok) 65 + nerrs++; 66 + else 67 + printf("[OK]\tThey all returned -ENOSYS\n"); 68 + } 69 + 70 + int main() 71 + { 72 + /* 73 + * Anyone diagnosing a failure will want to know whether the kernel 74 + * supports x32. Tell them. 75 + */ 76 + printf("\tChecking for x32..."); 77 + fflush(stdout); 78 + if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) { 79 + printf(" supported\n"); 80 + } else if (errno == ENOSYS) { 81 + printf(" not supported\n"); 82 + } else { 83 + printf(" confused\n"); 84 + } 85 + 86 + test_x32_without_x32_bit(); 87 + 88 + return nerrs ? 1 : 0; 89 + }