Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm64: fix compat syscall return truncation

Due to inconsistencies in the way we manipulate compat GPRs, we have a
few issues today:

* For audit and tracing, where error codes are handled as a (native)
long, negative error codes are expected to be sign-extended to the
native 64-bits, or they may fail to be matched correctly. Thus a
syscall which fails with an error may erroneously be identified as
failing.

* For ptrace, *all* compat return values should be sign-extended for
consistency with 32-bit arm, but we currently only do this for
negative return codes.

* As we may transiently set the upper 32 bits of some compat GPRs while
in the kernel, these can be sampled by perf, which is somewhat
confusing. This means that where a syscall returns a pointer above 2G,
this will be sign-extended, but will not be mistaken for an error as
error codes are constrained to the inclusive range [-4096, -1] where
no user pointer can exist.

To fix all of these, we must consistently use helpers to get/set the
compat GPRs, ensuring that we never write the upper 32 bits of the
return code, and always sign-extend when reading the return code. This
patch does so, with the following changes:

* We re-organise syscall_get_return_value() to always sign-extend for
compat tasks, and reimplement syscall_get_error() atop. We update
syscall_trace_exit() to use syscall_get_return_value().

* We consistently use syscall_set_return_value() to set the return
value, ensureing the upper 32 bits are never set unexpectedly.

* As the core audit code currently uses regs_return_value() rather than
syscall_get_return_value(), we special-case this for
compat_user_mode(regs) such that this will do the right thing. Going
forward, we should try to move the core audit code over to
syscall_get_return_value().

Cc: <stable@vger.kernel.org>
Reported-by: He Zhe <zhe.he@windriver.com>
Reported-by: weiyuchen <weiyuchen3@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210802104200.21390-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Mark Rutland and committed by
Will Deacon
e30e8d46 d8a71905

+28 -19
+11 -1
arch/arm64/include/asm/ptrace.h
··· 320 320 321 321 static inline unsigned long regs_return_value(struct pt_regs *regs) 322 322 { 323 - return regs->regs[0]; 323 + unsigned long val = regs->regs[0]; 324 + 325 + /* 326 + * Audit currently uses regs_return_value() instead of 327 + * syscall_get_return_value(). Apply the same sign-extension here until 328 + * audit is updated to use syscall_get_return_value(). 329 + */ 330 + if (compat_user_mode(regs)) 331 + val = sign_extend64(val, 31); 332 + 333 + return val; 324 334 } 325 335 326 336 static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+11 -10
arch/arm64/include/asm/syscall.h
··· 29 29 regs->regs[0] = regs->orig_x0; 30 30 } 31 31 32 + static inline long syscall_get_return_value(struct task_struct *task, 33 + struct pt_regs *regs) 34 + { 35 + unsigned long val = regs->regs[0]; 36 + 37 + if (is_compat_thread(task_thread_info(task))) 38 + val = sign_extend64(val, 31); 39 + 40 + return val; 41 + } 32 42 33 43 static inline long syscall_get_error(struct task_struct *task, 34 44 struct pt_regs *regs) 35 45 { 36 - unsigned long error = regs->regs[0]; 37 - 38 - if (is_compat_thread(task_thread_info(task))) 39 - error = sign_extend64(error, 31); 46 + unsigned long error = syscall_get_return_value(task, regs); 40 47 41 48 return IS_ERR_VALUE(error) ? error : 0; 42 - } 43 - 44 - static inline long syscall_get_return_value(struct task_struct *task, 45 - struct pt_regs *regs) 46 - { 47 - return regs->regs[0]; 48 49 } 49 50 50 51 static inline void syscall_set_return_value(struct task_struct *task,
+1 -1
arch/arm64/kernel/ptrace.c
··· 1862 1862 audit_syscall_exit(regs); 1863 1863 1864 1864 if (flags & _TIF_SYSCALL_TRACEPOINT) 1865 - trace_sys_exit(regs, regs_return_value(regs)); 1865 + trace_sys_exit(regs, syscall_get_return_value(current, regs)); 1866 1866 1867 1867 if (flags & (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP)) 1868 1868 tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT);
+2 -1
arch/arm64/kernel/signal.c
··· 29 29 #include <asm/unistd.h> 30 30 #include <asm/fpsimd.h> 31 31 #include <asm/ptrace.h> 32 + #include <asm/syscall.h> 32 33 #include <asm/signal32.h> 33 34 #include <asm/traps.h> 34 35 #include <asm/vdso.h> ··· 891 890 retval == -ERESTART_RESTARTBLOCK || 892 891 (retval == -ERESTARTSYS && 893 892 !(ksig.ka.sa.sa_flags & SA_RESTART)))) { 894 - regs->regs[0] = -EINTR; 893 + syscall_set_return_value(current, regs, -EINTR, 0); 895 894 regs->pc = continue_addr; 896 895 } 897 896
+3 -6
arch/arm64/kernel/syscall.c
··· 54 54 ret = do_ni_syscall(regs, scno); 55 55 } 56 56 57 - if (is_compat_task()) 58 - ret = lower_32_bits(ret); 59 - 60 - regs->regs[0] = ret; 57 + syscall_set_return_value(current, regs, 0, ret); 61 58 62 59 /* 63 60 * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), ··· 112 115 * syscall. do_notify_resume() will send a signal to userspace 113 116 * before the syscall is restarted. 114 117 */ 115 - regs->regs[0] = -ERESTARTNOINTR; 118 + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); 116 119 return; 117 120 } 118 121 ··· 133 136 * anyway. 134 137 */ 135 138 if (scno == NO_SYSCALL) 136 - regs->regs[0] = -ENOSYS; 139 + syscall_set_return_value(current, regs, -ENOSYS, 0); 137 140 scno = syscall_trace_enter(regs); 138 141 if (scno == NO_SYSCALL) 139 142 goto trace_exit;